Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / ffmpeg / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 #define QUANT_BIAS_SHIFT 8
63
64 #define QMAT_SHIFT_MMX 16
65 #define QMAT_SHIFT 21
66
67 static int encode_picture(MpegEncContext *s, int picture_number);
68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
69 static int sse_mb(MpegEncContext *s);
70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
72
73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
75
76 const AVOption ff_mpv_generic_options[] = {
77     FF_MPV_COMMON_OPTS
78     { NULL },
79 };
80
81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
82                        uint16_t (*qmat16)[2][64],
83                        const uint16_t *quant_matrix,
84                        int bias, int qmin, int qmax, int intra)
85 {
86     FDCTDSPContext *fdsp = &s->fdsp;
87     int qscale;
88     int shift = 0;
89
90     for (qscale = qmin; qscale <= qmax; qscale++) {
91         int i;
92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
93             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
94             fdsp->fdct == ff_faandct) {
95             for (i = 0; i < 64; i++) {
96                 const int j = s->idsp.idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
104                                         (qscale * quant_matrix[j]));
105             }
106         } else if (fdsp->fdct == ff_fdct_ifast) {
107             for (i = 0; i < 64; i++) {
108                 const int j = s->idsp.idct_permutation[i];
109                 /* 16 <= qscale * quant_matrix[i] <= 7905
110                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
111                  *             19952 <=              x  <= 249205026
112                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
113                  *           3444240 >= (1 << 36) / (x) >= 275 */
114
115                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
116                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
117             }
118         } else {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
122                  * Assume x = qscale * quant_matrix[i]
123                  * So             16 <=              x  <= 7905
124                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
125                  * so          32768 >= (1 << 19) / (x) >= 67 */
126                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
127                                         (qscale * quant_matrix[j]));
128                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
129                 //                    (qscale * quant_matrix[i]);
130                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
131                                        (qscale * quant_matrix[j]);
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void MPV_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_MPV_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 av_cold int ff_dct_encode_init(MpegEncContext *s) {
237     if (ARCH_X86)
238         ff_dct_encode_init_x86(s);
239
240     if (CONFIG_H263_ENCODER)
241         ff_h263dsp_init(&s->h263dsp);
242     if (!s->dct_quantize)
243         s->dct_quantize = ff_dct_quantize_c;
244     if (!s->denoise_dct)
245         s->denoise_dct  = denoise_dct_c;
246     s->fast_dct_quantize = s->dct_quantize;
247     if (s->avctx->trellis)
248         s->dct_quantize  = dct_quantize_trellis_c;
249
250     return 0;
251 }
252
253 /* init video encoder */
254 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
255 {
256     MpegEncContext *s = avctx->priv_data;
257     int i, ret, format_supported;
258
259     MPV_encode_defaults(s);
260
261     switch (avctx->codec_id) {
262     case AV_CODEC_ID_MPEG2VIDEO:
263         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
264             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
265             av_log(avctx, AV_LOG_ERROR,
266                    "only YUV420 and YUV422 are supported\n");
267             return -1;
268         }
269         break;
270     case AV_CODEC_ID_MJPEG:
271     case AV_CODEC_ID_AMV:
272         format_supported = 0;
273         /* JPEG color space */
274         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
275             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
276             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
277             (avctx->color_range == AVCOL_RANGE_JPEG &&
278              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
279               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
280               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
281             format_supported = 1;
282         /* MPEG color space */
283         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
284                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
285                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
286                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
287             format_supported = 1;
288
289         if (!format_supported) {
290             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
291             return -1;
292         }
293         break;
294     default:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
296             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
297             return -1;
298         }
299     }
300
301     switch (avctx->pix_fmt) {
302     case AV_PIX_FMT_YUVJ444P:
303     case AV_PIX_FMT_YUV444P:
304         s->chroma_format = CHROMA_444;
305         break;
306     case AV_PIX_FMT_YUVJ422P:
307     case AV_PIX_FMT_YUV422P:
308         s->chroma_format = CHROMA_422;
309         break;
310     case AV_PIX_FMT_YUVJ420P:
311     case AV_PIX_FMT_YUV420P:
312     default:
313         s->chroma_format = CHROMA_420;
314         break;
315     }
316
317     s->bit_rate = avctx->bit_rate;
318     s->width    = avctx->width;
319     s->height   = avctx->height;
320     if (avctx->gop_size > 600 &&
321         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
322         av_log(avctx, AV_LOG_WARNING,
323                "keyframe interval too large!, reducing it from %d to %d\n",
324                avctx->gop_size, 600);
325         avctx->gop_size = 600;
326     }
327     s->gop_size     = avctx->gop_size;
328     s->avctx        = avctx;
329     s->flags        = avctx->flags;
330     s->flags2       = avctx->flags2;
331     if (avctx->max_b_frames > MAX_B_FRAMES) {
332         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
333                "is %d.\n", MAX_B_FRAMES);
334         avctx->max_b_frames = MAX_B_FRAMES;
335     }
336     s->max_b_frames = avctx->max_b_frames;
337     s->codec_id     = avctx->codec->id;
338     s->strict_std_compliance = avctx->strict_std_compliance;
339     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
340     s->mpeg_quant         = avctx->mpeg_quant;
341     s->rtp_mode           = !!avctx->rtp_payload_size;
342     s->intra_dc_precision = avctx->intra_dc_precision;
343
344     // workaround some differences between how applications specify dc precission
345     if (s->intra_dc_precision < 0) {
346         s->intra_dc_precision += 8;
347     } else if (s->intra_dc_precision >= 8)
348         s->intra_dc_precision -= 8;
349
350     if (s->intra_dc_precision < 0) {
351         av_log(avctx, AV_LOG_ERROR,
352                 "intra dc precision must be positive, note some applications use"
353                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
354         return AVERROR(EINVAL);
355     }
356
357     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
358         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
359         return AVERROR(EINVAL);
360     }
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375     s->adaptive_quant = (s->avctx->lumi_masking ||
376                          s->avctx->dark_masking ||
377                          s->avctx->temporal_cplx_masking ||
378                          s->avctx->spatial_cplx_masking  ||
379                          s->avctx->p_masking      ||
380                          s->avctx->border_masking ||
381                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
382                         !s->fixed_qscale;
383
384     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
385
386     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
387         switch(avctx->codec_id) {
388         case AV_CODEC_ID_MPEG1VIDEO:
389         case AV_CODEC_ID_MPEG2VIDEO:
390             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
391             break;
392         case AV_CODEC_ID_MPEG4:
393         case AV_CODEC_ID_MSMPEG4V1:
394         case AV_CODEC_ID_MSMPEG4V2:
395         case AV_CODEC_ID_MSMPEG4V3:
396             if       (avctx->rc_max_rate >= 15000000) {
397                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
398             } else if(avctx->rc_max_rate >=  2000000) {
399                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
400             } else if(avctx->rc_max_rate >=   384000) {
401                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
402             } else
403                 avctx->rc_buffer_size = 40;
404             avctx->rc_buffer_size *= 16384;
405             break;
406         }
407         if (avctx->rc_buffer_size) {
408             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
409         }
410     }
411
412     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
413         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
414         return -1;
415     }
416
417     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
418         av_log(avctx, AV_LOG_INFO,
419                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
420     }
421
422     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
423         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
424         return -1;
425     }
426
427     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
428         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
429         return -1;
430     }
431
432     if (avctx->rc_max_rate &&
433         avctx->rc_max_rate == avctx->bit_rate &&
434         avctx->rc_max_rate != avctx->rc_min_rate) {
435         av_log(avctx, AV_LOG_INFO,
436                "impossible bitrate constraints, this will fail\n");
437     }
438
439     if (avctx->rc_buffer_size &&
440         avctx->bit_rate * (int64_t)avctx->time_base.num >
441             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
442         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
443         return -1;
444     }
445
446     if (!s->fixed_qscale &&
447         avctx->bit_rate * av_q2d(avctx->time_base) >
448             avctx->bit_rate_tolerance) {
449         av_log(avctx, AV_LOG_WARNING,
450                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
451         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
452     }
453
454     if (s->avctx->rc_max_rate &&
455         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
456         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
457          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
458         90000LL * (avctx->rc_buffer_size - 1) >
459             s->avctx->rc_max_rate * 0xFFFFLL) {
460         av_log(avctx, AV_LOG_INFO,
461                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
462                "specified vbv buffer is too large for the given bitrate!\n");
463     }
464
465     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
466         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
467         s->codec_id != AV_CODEC_ID_FLV1) {
468         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
469         return -1;
470     }
471
472     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
473         av_log(avctx, AV_LOG_ERROR,
474                "OBMC is only supported with simple mb decision\n");
475         return -1;
476     }
477
478     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
479         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
480         return -1;
481     }
482
483     if (s->max_b_frames                    &&
484         s->codec_id != AV_CODEC_ID_MPEG4      &&
485         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
486         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
487         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
488         return -1;
489     }
490     if (s->max_b_frames < 0) {
491         av_log(avctx, AV_LOG_ERROR,
492                "max b frames must be 0 or positive for mpegvideo based encoders\n");
493         return -1;
494     }
495
496     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
497          s->codec_id == AV_CODEC_ID_H263  ||
498          s->codec_id == AV_CODEC_ID_H263P) &&
499         (avctx->sample_aspect_ratio.num > 255 ||
500          avctx->sample_aspect_ratio.den > 255)) {
501         av_log(avctx, AV_LOG_WARNING,
502                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
503                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
504         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
505                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
506     }
507
508     if ((s->codec_id == AV_CODEC_ID_H263  ||
509          s->codec_id == AV_CODEC_ID_H263P) &&
510         (avctx->width  > 2048 ||
511          avctx->height > 1152 )) {
512         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
513         return -1;
514     }
515     if ((s->codec_id == AV_CODEC_ID_H263  ||
516          s->codec_id == AV_CODEC_ID_H263P) &&
517         ((avctx->width &3) ||
518          (avctx->height&3) )) {
519         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
520         return -1;
521     }
522
523     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
524         (avctx->width  > 4095 ||
525          avctx->height > 4095 )) {
526         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
527         return -1;
528     }
529
530     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
531         (avctx->width  > 16383 ||
532          avctx->height > 16383 )) {
533         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
534         return -1;
535     }
536
537     if (s->codec_id == AV_CODEC_ID_RV10 &&
538         (avctx->width &15 ||
539          avctx->height&15 )) {
540         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
541         return AVERROR(EINVAL);
542     }
543
544     if (s->codec_id == AV_CODEC_ID_RV20 &&
545         (avctx->width &3 ||
546          avctx->height&3 )) {
547         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
548         return AVERROR(EINVAL);
549     }
550
551     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
552          s->codec_id == AV_CODEC_ID_WMV2) &&
553          avctx->width & 1) {
554          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
555          return -1;
556     }
557
558     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
559         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
560         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
561         return -1;
562     }
563
564     // FIXME mpeg2 uses that too
565     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
566                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
567         av_log(avctx, AV_LOG_ERROR,
568                "mpeg2 style quantization not supported by codec\n");
569         return -1;
570     }
571
572     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
573         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
574         return -1;
575     }
576
577     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
578         s->avctx->mb_decision != FF_MB_DECISION_RD) {
579         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
580         return -1;
581     }
582
583     if (s->avctx->scenechange_threshold < 1000000000 &&
584         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
585         av_log(avctx, AV_LOG_ERROR,
586                "closed gop with scene change detection are not supported yet, "
587                "set threshold to 1000000000\n");
588         return -1;
589     }
590
591     if (s->flags & CODEC_FLAG_LOW_DELAY) {
592         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
593             av_log(avctx, AV_LOG_ERROR,
594                   "low delay forcing is only available for mpeg2\n");
595             return -1;
596         }
597         if (s->max_b_frames != 0) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "b frames cannot be used with low delay\n");
600             return -1;
601         }
602     }
603
604     if (s->q_scale_type == 1) {
605         if (avctx->qmax > 12) {
606             av_log(avctx, AV_LOG_ERROR,
607                    "non linear quant only supports qmax <= 12 currently\n");
608             return -1;
609         }
610     }
611
612     if (s->avctx->thread_count > 1         &&
613         s->codec_id != AV_CODEC_ID_MPEG4      &&
614         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
615         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
616         s->codec_id != AV_CODEC_ID_MJPEG      &&
617         (s->codec_id != AV_CODEC_ID_H263P)) {
618         av_log(avctx, AV_LOG_ERROR,
619                "multi threaded encoding not supported by codec\n");
620         return -1;
621     }
622
623     if (s->avctx->thread_count < 1) {
624         av_log(avctx, AV_LOG_ERROR,
625                "automatic thread number detection not supported by codec, "
626                "patch welcome\n");
627         return -1;
628     }
629
630     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
631         s->rtp_mode = 1;
632
633     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
634         s->h263_slice_structured = 1;
635
636     if (!avctx->time_base.den || !avctx->time_base.num) {
637         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
638         return -1;
639     }
640
641     i = (INT_MAX / 2 + 128) >> 8;
642     if (avctx->mb_threshold >= i) {
643         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
644                i - 1);
645         return -1;
646     }
647
648     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
649         av_log(avctx, AV_LOG_INFO,
650                "notice: b_frame_strategy only affects the first pass\n");
651         avctx->b_frame_strategy = 0;
652     }
653
654     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
655     if (i > 1) {
656         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
657         avctx->time_base.den /= i;
658         avctx->time_base.num /= i;
659         //return -1;
660     }
661
662     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
663         // (a + x * 3 / 8) / x
664         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
665         s->inter_quant_bias = 0;
666     } else {
667         s->intra_quant_bias = 0;
668         // (a - x / 4) / x
669         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
670     }
671
672     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
673         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
674         return AVERROR(EINVAL);
675     }
676
677     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
678         s->intra_quant_bias = avctx->intra_quant_bias;
679     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
680         s->inter_quant_bias = avctx->inter_quant_bias;
681
682     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
683
684     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
685         s->avctx->time_base.den > (1 << 16) - 1) {
686         av_log(avctx, AV_LOG_ERROR,
687                "timebase %d/%d not supported by MPEG 4 standard, "
688                "the maximum admitted value for the timebase denominator "
689                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
690                (1 << 16) - 1);
691         return -1;
692     }
693     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
694
695     switch (avctx->codec->id) {
696     case AV_CODEC_ID_MPEG1VIDEO:
697         s->out_format = FMT_MPEG1;
698         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
699         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
700         break;
701     case AV_CODEC_ID_MPEG2VIDEO:
702         s->out_format = FMT_MPEG1;
703         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
704         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
705         s->rtp_mode   = 1;
706         break;
707     case AV_CODEC_ID_MJPEG:
708     case AV_CODEC_ID_AMV:
709         s->out_format = FMT_MJPEG;
710         s->intra_only = 1; /* force intra only for jpeg */
711         if (!CONFIG_MJPEG_ENCODER ||
712             ff_mjpeg_encode_init(s) < 0)
713             return -1;
714         avctx->delay = 0;
715         s->low_delay = 1;
716         break;
717     case AV_CODEC_ID_H261:
718         if (!CONFIG_H261_ENCODER)
719             return -1;
720         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
721             av_log(avctx, AV_LOG_ERROR,
722                    "The specified picture size of %dx%d is not valid for the "
723                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
724                     s->width, s->height);
725             return -1;
726         }
727         s->out_format = FMT_H261;
728         avctx->delay  = 0;
729         s->low_delay  = 1;
730         break;
731     case AV_CODEC_ID_H263:
732         if (!CONFIG_H263_ENCODER)
733             return -1;
734         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
735                              s->width, s->height) == 8) {
736             av_log(avctx, AV_LOG_ERROR,
737                    "The specified picture size of %dx%d is not valid for "
738                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
739                    "352x288, 704x576, and 1408x1152. "
740                    "Try H.263+.\n", s->width, s->height);
741             return -1;
742         }
743         s->out_format = FMT_H263;
744         avctx->delay  = 0;
745         s->low_delay  = 1;
746         break;
747     case AV_CODEC_ID_H263P:
748         s->out_format = FMT_H263;
749         s->h263_plus  = 1;
750         /* Fx */
751         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
752         s->modified_quant  = s->h263_aic;
753         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
754         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
755
756         /* /Fx */
757         /* These are just to be sure */
758         avctx->delay = 0;
759         s->low_delay = 1;
760         break;
761     case AV_CODEC_ID_FLV1:
762         s->out_format      = FMT_H263;
763         s->h263_flv        = 2; /* format = 1; 11-bit codes */
764         s->unrestricted_mv = 1;
765         s->rtp_mode  = 0; /* don't allow GOB */
766         avctx->delay = 0;
767         s->low_delay = 1;
768         break;
769     case AV_CODEC_ID_RV10:
770         s->out_format = FMT_H263;
771         avctx->delay  = 0;
772         s->low_delay  = 1;
773         break;
774     case AV_CODEC_ID_RV20:
775         s->out_format      = FMT_H263;
776         avctx->delay       = 0;
777         s->low_delay       = 1;
778         s->modified_quant  = 1;
779         s->h263_aic        = 1;
780         s->h263_plus       = 1;
781         s->loop_filter     = 1;
782         s->unrestricted_mv = 0;
783         break;
784     case AV_CODEC_ID_MPEG4:
785         s->out_format      = FMT_H263;
786         s->h263_pred       = 1;
787         s->unrestricted_mv = 1;
788         s->low_delay       = s->max_b_frames ? 0 : 1;
789         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
790         break;
791     case AV_CODEC_ID_MSMPEG4V2:
792         s->out_format      = FMT_H263;
793         s->h263_pred       = 1;
794         s->unrestricted_mv = 1;
795         s->msmpeg4_version = 2;
796         avctx->delay       = 0;
797         s->low_delay       = 1;
798         break;
799     case AV_CODEC_ID_MSMPEG4V3:
800         s->out_format        = FMT_H263;
801         s->h263_pred         = 1;
802         s->unrestricted_mv   = 1;
803         s->msmpeg4_version   = 3;
804         s->flipflop_rounding = 1;
805         avctx->delay         = 0;
806         s->low_delay         = 1;
807         break;
808     case AV_CODEC_ID_WMV1:
809         s->out_format        = FMT_H263;
810         s->h263_pred         = 1;
811         s->unrestricted_mv   = 1;
812         s->msmpeg4_version   = 4;
813         s->flipflop_rounding = 1;
814         avctx->delay         = 0;
815         s->low_delay         = 1;
816         break;
817     case AV_CODEC_ID_WMV2:
818         s->out_format        = FMT_H263;
819         s->h263_pred         = 1;
820         s->unrestricted_mv   = 1;
821         s->msmpeg4_version   = 5;
822         s->flipflop_rounding = 1;
823         avctx->delay         = 0;
824         s->low_delay         = 1;
825         break;
826     default:
827         return -1;
828     }
829
830     avctx->has_b_frames = !s->low_delay;
831
832     s->encoding = 1;
833
834     s->progressive_frame    =
835     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
836                                                 CODEC_FLAG_INTERLACED_ME) ||
837                                 s->alternate_scan);
838
839     /* init */
840     if (ff_MPV_common_init(s) < 0)
841         return -1;
842
843     ff_fdctdsp_init(&s->fdsp, avctx);
844     ff_me_cmp_init(&s->mecc, avctx);
845     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
846     ff_pixblockdsp_init(&s->pdsp, avctx);
847     ff_qpeldsp_init(&s->qdsp);
848
849     s->avctx->coded_frame = s->current_picture.f;
850
851     if (s->msmpeg4_version) {
852         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
853                           2 * 2 * (MAX_LEVEL + 1) *
854                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
855     }
856     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
857
858     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
859     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
860     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
864     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
865                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
866     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
867                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
868
869     if (s->avctx->noise_reduction) {
870         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
871                           2 * 64 * sizeof(uint16_t), fail);
872     }
873
874     ff_dct_encode_init(s);
875
876     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
877         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
878
879     s->quant_precision = 5;
880
881     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
882     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
883
884     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
885         ff_h261_encode_init(s);
886     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
887         ff_h263_encode_init(s);
888     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
889         ff_msmpeg4_encode_init(s);
890     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
891         && s->out_format == FMT_MPEG1)
892         ff_mpeg1_encode_init(s);
893
894     /* init q matrix */
895     for (i = 0; i < 64; i++) {
896         int j = s->idsp.idct_permutation[i];
897         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
898             s->mpeg_quant) {
899             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
900             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
901         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
902             s->intra_matrix[j] =
903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
904         } else {
905             /* mpeg1/2 */
906             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
908         }
909         if (s->avctx->intra_matrix)
910             s->intra_matrix[j] = s->avctx->intra_matrix[i];
911         if (s->avctx->inter_matrix)
912             s->inter_matrix[j] = s->avctx->inter_matrix[i];
913     }
914
915     /* precompute matrix */
916     /* for mjpeg, we do include qscale in the matrix */
917     if (s->out_format != FMT_MJPEG) {
918         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
919                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
920                           31, 1);
921         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
922                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
923                           31, 0);
924     }
925
926     if (ff_rate_control_init(s) < 0)
927         return -1;
928
929 #if FF_API_ERROR_RATE
930     FF_DISABLE_DEPRECATION_WARNINGS
931     if (avctx->error_rate)
932         s->error_rate = avctx->error_rate;
933     FF_ENABLE_DEPRECATION_WARNINGS;
934 #endif
935
936 #if FF_API_NORMALIZE_AQP
937     FF_DISABLE_DEPRECATION_WARNINGS
938     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
939         s->mpv_flags |= FF_MPV_FLAG_NAQ;
940     FF_ENABLE_DEPRECATION_WARNINGS;
941 #endif
942
943 #if FF_API_MV0
944     FF_DISABLE_DEPRECATION_WARNINGS
945     if (avctx->flags & CODEC_FLAG_MV0)
946         s->mpv_flags |= FF_MPV_FLAG_MV0;
947     FF_ENABLE_DEPRECATION_WARNINGS
948 #endif
949
950     if (avctx->b_frame_strategy == 2) {
951         for (i = 0; i < s->max_b_frames + 2; i++) {
952             s->tmp_frames[i] = av_frame_alloc();
953             if (!s->tmp_frames[i])
954                 return AVERROR(ENOMEM);
955
956             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
957             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
958             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
959
960             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
961             if (ret < 0)
962                 return ret;
963         }
964     }
965
966     return 0;
967 fail:
968     ff_MPV_encode_end(avctx);
969     return AVERROR_UNKNOWN;
970 }
971
972 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
973 {
974     MpegEncContext *s = avctx->priv_data;
975     int i;
976
977     ff_rate_control_uninit(s);
978
979     ff_MPV_common_end(s);
980     if (CONFIG_MJPEG_ENCODER &&
981         s->out_format == FMT_MJPEG)
982         ff_mjpeg_encode_close(s);
983
984     av_freep(&avctx->extradata);
985
986     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
987         av_frame_free(&s->tmp_frames[i]);
988
989     ff_free_picture_tables(&s->new_picture);
990     ff_mpeg_unref_picture(s, &s->new_picture);
991
992     av_freep(&s->avctx->stats_out);
993     av_freep(&s->ac_stats);
994
995     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
996     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
997     s->q_chroma_intra_matrix=   NULL;
998     s->q_chroma_intra_matrix16= NULL;
999     av_freep(&s->q_intra_matrix);
1000     av_freep(&s->q_inter_matrix);
1001     av_freep(&s->q_intra_matrix16);
1002     av_freep(&s->q_inter_matrix16);
1003     av_freep(&s->input_picture);
1004     av_freep(&s->reordered_input_picture);
1005     av_freep(&s->dct_offset);
1006
1007     return 0;
1008 }
1009
1010 static int get_sae(uint8_t *src, int ref, int stride)
1011 {
1012     int x,y;
1013     int acc = 0;
1014
1015     for (y = 0; y < 16; y++) {
1016         for (x = 0; x < 16; x++) {
1017             acc += FFABS(src[x + y * stride] - ref);
1018         }
1019     }
1020
1021     return acc;
1022 }
1023
1024 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1025                            uint8_t *ref, int stride)
1026 {
1027     int x, y, w, h;
1028     int acc = 0;
1029
1030     w = s->width  & ~15;
1031     h = s->height & ~15;
1032
1033     for (y = 0; y < h; y += 16) {
1034         for (x = 0; x < w; x += 16) {
1035             int offset = x + y * stride;
1036             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1037                                       stride, 16);
1038             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1039             int sae  = get_sae(src + offset, mean, stride);
1040
1041             acc += sae + 500 < sad;
1042         }
1043     }
1044     return acc;
1045 }
1046
1047
1048 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1049 {
1050     Picture *pic = NULL;
1051     int64_t pts;
1052     int i, display_picture_number = 0, ret;
1053     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1054                                                  (s->low_delay ? 0 : 1);
1055     int direct = 1;
1056
1057     if (pic_arg) {
1058         pts = pic_arg->pts;
1059         display_picture_number = s->input_picture_number++;
1060
1061         if (pts != AV_NOPTS_VALUE) {
1062             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1063                 int64_t last = s->user_specified_pts;
1064
1065                 if (pts <= last) {
1066                     av_log(s->avctx, AV_LOG_ERROR,
1067                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1068                            pts, last);
1069                     return AVERROR(EINVAL);
1070                 }
1071
1072                 if (!s->low_delay && display_picture_number == 1)
1073                     s->dts_delta = pts - last;
1074             }
1075             s->user_specified_pts = pts;
1076         } else {
1077             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1078                 s->user_specified_pts =
1079                 pts = s->user_specified_pts + 1;
1080                 av_log(s->avctx, AV_LOG_INFO,
1081                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1082                        pts);
1083             } else {
1084                 pts = display_picture_number;
1085             }
1086         }
1087     }
1088
1089     if (pic_arg) {
1090         if (!pic_arg->buf[0])
1091             direct = 0;
1092         if (pic_arg->linesize[0] != s->linesize)
1093             direct = 0;
1094         if (pic_arg->linesize[1] != s->uvlinesize)
1095             direct = 0;
1096         if (pic_arg->linesize[2] != s->uvlinesize)
1097             direct = 0;
1098         if ((s->width & 15) || (s->height & 15))
1099             direct = 0;
1100         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1101             direct = 0;
1102         if (s->linesize & (STRIDE_ALIGN-1))
1103             direct = 0;
1104
1105         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1106                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1107
1108         if (direct) {
1109             i = ff_find_unused_picture(s, 1);
1110             if (i < 0)
1111                 return i;
1112
1113             pic = &s->picture[i];
1114             pic->reference = 3;
1115
1116             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1117                 return ret;
1118             if (ff_alloc_picture(s, pic, 1) < 0) {
1119                 return -1;
1120             }
1121         } else {
1122             i = ff_find_unused_picture(s, 0);
1123             if (i < 0)
1124                 return i;
1125
1126             pic = &s->picture[i];
1127             pic->reference = 3;
1128
1129             if (ff_alloc_picture(s, pic, 0) < 0) {
1130                 return -1;
1131             }
1132
1133             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1134                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1135                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1136                 // empty
1137             } else {
1138                 int h_chroma_shift, v_chroma_shift;
1139                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1140                                                  &h_chroma_shift,
1141                                                  &v_chroma_shift);
1142
1143                 for (i = 0; i < 3; i++) {
1144                     int src_stride = pic_arg->linesize[i];
1145                     int dst_stride = i ? s->uvlinesize : s->linesize;
1146                     int h_shift = i ? h_chroma_shift : 0;
1147                     int v_shift = i ? v_chroma_shift : 0;
1148                     int w = s->width  >> h_shift;
1149                     int h = s->height >> v_shift;
1150                     uint8_t *src = pic_arg->data[i];
1151                     uint8_t *dst = pic->f->data[i];
1152                     int vpad = 16;
1153
1154                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1155                         && !s->progressive_sequence
1156                         && FFALIGN(s->height, 32) - s->height > 16)
1157                         vpad = 32;
1158
1159                     if (!s->avctx->rc_buffer_size)
1160                         dst += INPLACE_OFFSET;
1161
1162                     if (src_stride == dst_stride)
1163                         memcpy(dst, src, src_stride * h);
1164                     else {
1165                         int h2 = h;
1166                         uint8_t *dst2 = dst;
1167                         while (h2--) {
1168                             memcpy(dst2, src, w);
1169                             dst2 += dst_stride;
1170                             src += src_stride;
1171                         }
1172                     }
1173                     if ((s->width & 15) || (s->height & (vpad-1))) {
1174                         s->mpvencdsp.draw_edges(dst, dst_stride,
1175                                                 w, h,
1176                                                 16>>h_shift,
1177                                                 vpad>>v_shift,
1178                                                 EDGE_BOTTOM);
1179                     }
1180                 }
1181             }
1182         }
1183         ret = av_frame_copy_props(pic->f, pic_arg);
1184         if (ret < 0)
1185             return ret;
1186
1187         pic->f->display_picture_number = display_picture_number;
1188         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1189     }
1190
1191     /* shift buffer entries */
1192     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1193         s->input_picture[i - 1] = s->input_picture[i];
1194
1195     s->input_picture[encoding_delay] = (Picture*) pic;
1196
1197     return 0;
1198 }
1199
1200 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1201 {
1202     int x, y, plane;
1203     int score = 0;
1204     int64_t score64 = 0;
1205
1206     for (plane = 0; plane < 3; plane++) {
1207         const int stride = p->f->linesize[plane];
1208         const int bw = plane ? 1 : 2;
1209         for (y = 0; y < s->mb_height * bw; y++) {
1210             for (x = 0; x < s->mb_width * bw; x++) {
1211                 int off = p->shared ? 0 : 16;
1212                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1213                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1214                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1215
1216                 switch (FFABS(s->avctx->frame_skip_exp)) {
1217                 case 0: score    =  FFMAX(score, v);          break;
1218                 case 1: score   += FFABS(v);                  break;
1219                 case 2: score64 += v * (int64_t)v;                       break;
1220                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1221                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1222                 }
1223             }
1224         }
1225     }
1226     emms_c();
1227
1228     if (score)
1229         score64 = score;
1230     if (s->avctx->frame_skip_exp < 0)
1231         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1232                       -1.0/s->avctx->frame_skip_exp);
1233
1234     if (score64 < s->avctx->frame_skip_threshold)
1235         return 1;
1236     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1237         return 1;
1238     return 0;
1239 }
1240
1241 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1242 {
1243     AVPacket pkt = { 0 };
1244     int ret, got_output;
1245
1246     av_init_packet(&pkt);
1247     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1248     if (ret < 0)
1249         return ret;
1250
1251     ret = pkt.size;
1252     av_free_packet(&pkt);
1253     return ret;
1254 }
1255
1256 static int estimate_best_b_count(MpegEncContext *s)
1257 {
1258     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1259     AVCodecContext *c = avcodec_alloc_context3(NULL);
1260     const int scale = s->avctx->brd_scale;
1261     int i, j, out_size, p_lambda, b_lambda, lambda2;
1262     int64_t best_rd  = INT64_MAX;
1263     int best_b_count = -1;
1264
1265     av_assert0(scale >= 0 && scale <= 3);
1266
1267     //emms_c();
1268     //s->next_picture_ptr->quality;
1269     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1270     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1271     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1272     if (!b_lambda) // FIXME we should do this somewhere else
1273         b_lambda = p_lambda;
1274     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1275                FF_LAMBDA_SHIFT;
1276
1277     c->width        = s->width  >> scale;
1278     c->height       = s->height >> scale;
1279     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1280     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1281     c->mb_decision  = s->avctx->mb_decision;
1282     c->me_cmp       = s->avctx->me_cmp;
1283     c->mb_cmp       = s->avctx->mb_cmp;
1284     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1285     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1286     c->time_base    = s->avctx->time_base;
1287     c->max_b_frames = s->max_b_frames;
1288
1289     if (avcodec_open2(c, codec, NULL) < 0)
1290         return -1;
1291
1292     for (i = 0; i < s->max_b_frames + 2; i++) {
1293         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1294                                                 s->next_picture_ptr;
1295         uint8_t *data[4];
1296
1297         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1298             pre_input = *pre_input_ptr;
1299             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1300
1301             if (!pre_input.shared && i) {
1302                 data[0] += INPLACE_OFFSET;
1303                 data[1] += INPLACE_OFFSET;
1304                 data[2] += INPLACE_OFFSET;
1305             }
1306
1307             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1308                                        s->tmp_frames[i]->linesize[0],
1309                                        data[0],
1310                                        pre_input.f->linesize[0],
1311                                        c->width, c->height);
1312             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1313                                        s->tmp_frames[i]->linesize[1],
1314                                        data[1],
1315                                        pre_input.f->linesize[1],
1316                                        c->width >> 1, c->height >> 1);
1317             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1318                                        s->tmp_frames[i]->linesize[2],
1319                                        data[2],
1320                                        pre_input.f->linesize[2],
1321                                        c->width >> 1, c->height >> 1);
1322         }
1323     }
1324
1325     for (j = 0; j < s->max_b_frames + 1; j++) {
1326         int64_t rd = 0;
1327
1328         if (!s->input_picture[j])
1329             break;
1330
1331         c->error[0] = c->error[1] = c->error[2] = 0;
1332
1333         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1334         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1335
1336         out_size = encode_frame(c, s->tmp_frames[0]);
1337
1338         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1339
1340         for (i = 0; i < s->max_b_frames + 1; i++) {
1341             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1342
1343             s->tmp_frames[i + 1]->pict_type = is_p ?
1344                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1345             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1346
1347             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1348
1349             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1350         }
1351
1352         /* get the delayed frames */
1353         while (out_size) {
1354             out_size = encode_frame(c, NULL);
1355             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1356         }
1357
1358         rd += c->error[0] + c->error[1] + c->error[2];
1359
1360         if (rd < best_rd) {
1361             best_rd = rd;
1362             best_b_count = j;
1363         }
1364     }
1365
1366     avcodec_close(c);
1367     av_freep(&c);
1368
1369     return best_b_count;
1370 }
1371
1372 static int select_input_picture(MpegEncContext *s)
1373 {
1374     int i, ret;
1375
1376     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1377         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1378     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1379
1380     /* set next picture type & ordering */
1381     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1382         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1383             if (s->picture_in_gop_number < s->gop_size &&
1384                 s->next_picture_ptr &&
1385                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1386                 // FIXME check that te gop check above is +-1 correct
1387                 av_frame_unref(s->input_picture[0]->f);
1388
1389                 ff_vbv_update(s, 0);
1390
1391                 goto no_output_pic;
1392             }
1393         }
1394
1395         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1396             s->next_picture_ptr == NULL || s->intra_only) {
1397             s->reordered_input_picture[0] = s->input_picture[0];
1398             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1399             s->reordered_input_picture[0]->f->coded_picture_number =
1400                 s->coded_picture_number++;
1401         } else {
1402             int b_frames;
1403
1404             if (s->flags & CODEC_FLAG_PASS2) {
1405                 for (i = 0; i < s->max_b_frames + 1; i++) {
1406                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1407
1408                     if (pict_num >= s->rc_context.num_entries)
1409                         break;
1410                     if (!s->input_picture[i]) {
1411                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1412                         break;
1413                     }
1414
1415                     s->input_picture[i]->f->pict_type =
1416                         s->rc_context.entry[pict_num].new_pict_type;
1417                 }
1418             }
1419
1420             if (s->avctx->b_frame_strategy == 0) {
1421                 b_frames = s->max_b_frames;
1422                 while (b_frames && !s->input_picture[b_frames])
1423                     b_frames--;
1424             } else if (s->avctx->b_frame_strategy == 1) {
1425                 for (i = 1; i < s->max_b_frames + 1; i++) {
1426                     if (s->input_picture[i] &&
1427                         s->input_picture[i]->b_frame_score == 0) {
1428                         s->input_picture[i]->b_frame_score =
1429                             get_intra_count(s,
1430                                             s->input_picture[i    ]->f->data[0],
1431                                             s->input_picture[i - 1]->f->data[0],
1432                                             s->linesize) + 1;
1433                     }
1434                 }
1435                 for (i = 0; i < s->max_b_frames + 1; i++) {
1436                     if (s->input_picture[i] == NULL ||
1437                         s->input_picture[i]->b_frame_score - 1 >
1438                             s->mb_num / s->avctx->b_sensitivity)
1439                         break;
1440                 }
1441
1442                 b_frames = FFMAX(0, i - 1);
1443
1444                 /* reset scores */
1445                 for (i = 0; i < b_frames + 1; i++) {
1446                     s->input_picture[i]->b_frame_score = 0;
1447                 }
1448             } else if (s->avctx->b_frame_strategy == 2) {
1449                 b_frames = estimate_best_b_count(s);
1450             } else {
1451                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1452                 b_frames = 0;
1453             }
1454
1455             emms_c();
1456
1457             for (i = b_frames - 1; i >= 0; i--) {
1458                 int type = s->input_picture[i]->f->pict_type;
1459                 if (type && type != AV_PICTURE_TYPE_B)
1460                     b_frames = i;
1461             }
1462             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1463                 b_frames == s->max_b_frames) {
1464                 av_log(s->avctx, AV_LOG_ERROR,
1465                        "warning, too many b frames in a row\n");
1466             }
1467
1468             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1469                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1470                     s->gop_size > s->picture_in_gop_number) {
1471                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1472                 } else {
1473                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1474                         b_frames = 0;
1475                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1476                 }
1477             }
1478
1479             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1480                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1481                 b_frames--;
1482
1483             s->reordered_input_picture[0] = s->input_picture[b_frames];
1484             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1485                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1486             s->reordered_input_picture[0]->f->coded_picture_number =
1487                 s->coded_picture_number++;
1488             for (i = 0; i < b_frames; i++) {
1489                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1490                 s->reordered_input_picture[i + 1]->f->pict_type =
1491                     AV_PICTURE_TYPE_B;
1492                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1493                     s->coded_picture_number++;
1494             }
1495         }
1496     }
1497 no_output_pic:
1498     if (s->reordered_input_picture[0]) {
1499         s->reordered_input_picture[0]->reference =
1500            s->reordered_input_picture[0]->f->pict_type !=
1501                AV_PICTURE_TYPE_B ? 3 : 0;
1502
1503         ff_mpeg_unref_picture(s, &s->new_picture);
1504         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1505             return ret;
1506
1507         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1508             // input is a shared pix, so we can't modifiy it -> alloc a new
1509             // one & ensure that the shared one is reuseable
1510
1511             Picture *pic;
1512             int i = ff_find_unused_picture(s, 0);
1513             if (i < 0)
1514                 return i;
1515             pic = &s->picture[i];
1516
1517             pic->reference = s->reordered_input_picture[0]->reference;
1518             if (ff_alloc_picture(s, pic, 0) < 0) {
1519                 return -1;
1520             }
1521
1522             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1523             if (ret < 0)
1524                 return ret;
1525
1526             /* mark us unused / free shared pic */
1527             av_frame_unref(s->reordered_input_picture[0]->f);
1528             s->reordered_input_picture[0]->shared = 0;
1529
1530             s->current_picture_ptr = pic;
1531         } else {
1532             // input is not a shared pix -> reuse buffer for current_pix
1533             s->current_picture_ptr = s->reordered_input_picture[0];
1534             for (i = 0; i < 4; i++) {
1535                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1536             }
1537         }
1538         ff_mpeg_unref_picture(s, &s->current_picture);
1539         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1540                                        s->current_picture_ptr)) < 0)
1541             return ret;
1542
1543         s->picture_number = s->new_picture.f->display_picture_number;
1544     } else {
1545         ff_mpeg_unref_picture(s, &s->new_picture);
1546     }
1547     return 0;
1548 }
1549
1550 static void frame_end(MpegEncContext *s)
1551 {
1552     if (s->unrestricted_mv &&
1553         s->current_picture.reference &&
1554         !s->intra_only) {
1555         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1556         int hshift = desc->log2_chroma_w;
1557         int vshift = desc->log2_chroma_h;
1558         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1559                                 s->current_picture.f->linesize[0],
1560                                 s->h_edge_pos, s->v_edge_pos,
1561                                 EDGE_WIDTH, EDGE_WIDTH,
1562                                 EDGE_TOP | EDGE_BOTTOM);
1563         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1564                                 s->current_picture.f->linesize[1],
1565                                 s->h_edge_pos >> hshift,
1566                                 s->v_edge_pos >> vshift,
1567                                 EDGE_WIDTH >> hshift,
1568                                 EDGE_WIDTH >> vshift,
1569                                 EDGE_TOP | EDGE_BOTTOM);
1570         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1571                                 s->current_picture.f->linesize[2],
1572                                 s->h_edge_pos >> hshift,
1573                                 s->v_edge_pos >> vshift,
1574                                 EDGE_WIDTH >> hshift,
1575                                 EDGE_WIDTH >> vshift,
1576                                 EDGE_TOP | EDGE_BOTTOM);
1577     }
1578
1579     emms_c();
1580
1581     s->last_pict_type                 = s->pict_type;
1582     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1583     if (s->pict_type!= AV_PICTURE_TYPE_B)
1584         s->last_non_b_pict_type = s->pict_type;
1585
1586     s->avctx->coded_frame = s->current_picture_ptr->f;
1587
1588 }
1589
1590 static void update_noise_reduction(MpegEncContext *s)
1591 {
1592     int intra, i;
1593
1594     for (intra = 0; intra < 2; intra++) {
1595         if (s->dct_count[intra] > (1 << 16)) {
1596             for (i = 0; i < 64; i++) {
1597                 s->dct_error_sum[intra][i] >>= 1;
1598             }
1599             s->dct_count[intra] >>= 1;
1600         }
1601
1602         for (i = 0; i < 64; i++) {
1603             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1604                                        s->dct_count[intra] +
1605                                        s->dct_error_sum[intra][i] / 2) /
1606                                       (s->dct_error_sum[intra][i] + 1);
1607         }
1608     }
1609 }
1610
1611 static int frame_start(MpegEncContext *s)
1612 {
1613     int ret;
1614
1615     /* mark & release old frames */
1616     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1617         s->last_picture_ptr != s->next_picture_ptr &&
1618         s->last_picture_ptr->f->buf[0]) {
1619         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1620     }
1621
1622     s->current_picture_ptr->f->pict_type = s->pict_type;
1623     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1624
1625     ff_mpeg_unref_picture(s, &s->current_picture);
1626     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1627                                    s->current_picture_ptr)) < 0)
1628         return ret;
1629
1630     if (s->pict_type != AV_PICTURE_TYPE_B) {
1631         s->last_picture_ptr = s->next_picture_ptr;
1632         if (!s->droppable)
1633             s->next_picture_ptr = s->current_picture_ptr;
1634     }
1635
1636     if (s->last_picture_ptr) {
1637         ff_mpeg_unref_picture(s, &s->last_picture);
1638         if (s->last_picture_ptr->f->buf[0] &&
1639             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1640                                        s->last_picture_ptr)) < 0)
1641             return ret;
1642     }
1643     if (s->next_picture_ptr) {
1644         ff_mpeg_unref_picture(s, &s->next_picture);
1645         if (s->next_picture_ptr->f->buf[0] &&
1646             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1647                                        s->next_picture_ptr)) < 0)
1648             return ret;
1649     }
1650
1651     if (s->picture_structure!= PICT_FRAME) {
1652         int i;
1653         for (i = 0; i < 4; i++) {
1654             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1655                 s->current_picture.f->data[i] +=
1656                     s->current_picture.f->linesize[i];
1657             }
1658             s->current_picture.f->linesize[i] *= 2;
1659             s->last_picture.f->linesize[i]    *= 2;
1660             s->next_picture.f->linesize[i]    *= 2;
1661         }
1662     }
1663
1664     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1665         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1666         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1667     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1668         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1669         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1670     } else {
1671         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1672         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1673     }
1674
1675     if (s->dct_error_sum) {
1676         av_assert2(s->avctx->noise_reduction && s->encoding);
1677         update_noise_reduction(s);
1678     }
1679
1680     return 0;
1681 }
1682
1683 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1684                           const AVFrame *pic_arg, int *got_packet)
1685 {
1686     MpegEncContext *s = avctx->priv_data;
1687     int i, stuffing_count, ret;
1688     int context_count = s->slice_context_count;
1689
1690     s->picture_in_gop_number++;
1691
1692     if (load_input_picture(s, pic_arg) < 0)
1693         return -1;
1694
1695     if (select_input_picture(s) < 0) {
1696         return -1;
1697     }
1698
1699     /* output? */
1700     if (s->new_picture.f->data[0]) {
1701         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1702             return ret;
1703         if (s->mb_info) {
1704             s->mb_info_ptr = av_packet_new_side_data(pkt,
1705                                  AV_PKT_DATA_H263_MB_INFO,
1706                                  s->mb_width*s->mb_height*12);
1707             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1708         }
1709
1710         for (i = 0; i < context_count; i++) {
1711             int start_y = s->thread_context[i]->start_mb_y;
1712             int   end_y = s->thread_context[i]->  end_mb_y;
1713             int h       = s->mb_height;
1714             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1715             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1716
1717             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1718         }
1719
1720         s->pict_type = s->new_picture.f->pict_type;
1721         //emms_c();
1722         ret = frame_start(s);
1723         if (ret < 0)
1724             return ret;
1725 vbv_retry:
1726         if (encode_picture(s, s->picture_number) < 0)
1727             return -1;
1728
1729         avctx->header_bits = s->header_bits;
1730         avctx->mv_bits     = s->mv_bits;
1731         avctx->misc_bits   = s->misc_bits;
1732         avctx->i_tex_bits  = s->i_tex_bits;
1733         avctx->p_tex_bits  = s->p_tex_bits;
1734         avctx->i_count     = s->i_count;
1735         // FIXME f/b_count in avctx
1736         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1737         avctx->skip_count  = s->skip_count;
1738
1739         frame_end(s);
1740
1741         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1742             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1743
1744         if (avctx->rc_buffer_size) {
1745             RateControlContext *rcc = &s->rc_context;
1746             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1747
1748             if (put_bits_count(&s->pb) > max_size &&
1749                 s->lambda < s->avctx->lmax) {
1750                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1751                                        (s->qscale + 1) / s->qscale);
1752                 if (s->adaptive_quant) {
1753                     int i;
1754                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1755                         s->lambda_table[i] =
1756                             FFMAX(s->lambda_table[i] + 1,
1757                                   s->lambda_table[i] * (s->qscale + 1) /
1758                                   s->qscale);
1759                 }
1760                 s->mb_skipped = 0;        // done in frame_start()
1761                 // done in encode_picture() so we must undo it
1762                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1763                     if (s->flipflop_rounding          ||
1764                         s->codec_id == AV_CODEC_ID_H263P ||
1765                         s->codec_id == AV_CODEC_ID_MPEG4)
1766                         s->no_rounding ^= 1;
1767                 }
1768                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1769                     s->time_base       = s->last_time_base;
1770                     s->last_non_b_time = s->time - s->pp_time;
1771                 }
1772                 for (i = 0; i < context_count; i++) {
1773                     PutBitContext *pb = &s->thread_context[i]->pb;
1774                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1775                 }
1776                 goto vbv_retry;
1777             }
1778
1779             av_assert0(s->avctx->rc_max_rate);
1780         }
1781
1782         if (s->flags & CODEC_FLAG_PASS1)
1783             ff_write_pass1_stats(s);
1784
1785         for (i = 0; i < 4; i++) {
1786             s->current_picture_ptr->f->error[i] =
1787             s->current_picture.f->error[i] =
1788                 s->current_picture.error[i];
1789             avctx->error[i] += s->current_picture_ptr->f->error[i];
1790         }
1791
1792         if (s->flags & CODEC_FLAG_PASS1)
1793             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1794                    avctx->i_tex_bits + avctx->p_tex_bits ==
1795                        put_bits_count(&s->pb));
1796         flush_put_bits(&s->pb);
1797         s->frame_bits  = put_bits_count(&s->pb);
1798
1799         stuffing_count = ff_vbv_update(s, s->frame_bits);
1800         s->stuffing_bits = 8*stuffing_count;
1801         if (stuffing_count) {
1802             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1803                     stuffing_count + 50) {
1804                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1805                 return -1;
1806             }
1807
1808             switch (s->codec_id) {
1809             case AV_CODEC_ID_MPEG1VIDEO:
1810             case AV_CODEC_ID_MPEG2VIDEO:
1811                 while (stuffing_count--) {
1812                     put_bits(&s->pb, 8, 0);
1813                 }
1814             break;
1815             case AV_CODEC_ID_MPEG4:
1816                 put_bits(&s->pb, 16, 0);
1817                 put_bits(&s->pb, 16, 0x1C3);
1818                 stuffing_count -= 4;
1819                 while (stuffing_count--) {
1820                     put_bits(&s->pb, 8, 0xFF);
1821                 }
1822             break;
1823             default:
1824                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1825             }
1826             flush_put_bits(&s->pb);
1827             s->frame_bits  = put_bits_count(&s->pb);
1828         }
1829
1830         /* update mpeg1/2 vbv_delay for CBR */
1831         if (s->avctx->rc_max_rate                          &&
1832             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1833             s->out_format == FMT_MPEG1                     &&
1834             90000LL * (avctx->rc_buffer_size - 1) <=
1835                 s->avctx->rc_max_rate * 0xFFFFLL) {
1836             int vbv_delay, min_delay;
1837             double inbits  = s->avctx->rc_max_rate *
1838                              av_q2d(s->avctx->time_base);
1839             int    minbits = s->frame_bits - 8 *
1840                              (s->vbv_delay_ptr - s->pb.buf - 1);
1841             double bits    = s->rc_context.buffer_index + minbits - inbits;
1842
1843             if (bits < 0)
1844                 av_log(s->avctx, AV_LOG_ERROR,
1845                        "Internal error, negative bits\n");
1846
1847             assert(s->repeat_first_field == 0);
1848
1849             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1850             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1851                         s->avctx->rc_max_rate;
1852
1853             vbv_delay = FFMAX(vbv_delay, min_delay);
1854
1855             av_assert0(vbv_delay < 0xFFFF);
1856
1857             s->vbv_delay_ptr[0] &= 0xF8;
1858             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1859             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1860             s->vbv_delay_ptr[2] &= 0x07;
1861             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1862             avctx->vbv_delay     = vbv_delay * 300;
1863         }
1864         s->total_bits     += s->frame_bits;
1865         avctx->frame_bits  = s->frame_bits;
1866
1867         pkt->pts = s->current_picture.f->pts;
1868         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1869             if (!s->current_picture.f->coded_picture_number)
1870                 pkt->dts = pkt->pts - s->dts_delta;
1871             else
1872                 pkt->dts = s->reordered_pts;
1873             s->reordered_pts = pkt->pts;
1874         } else
1875             pkt->dts = pkt->pts;
1876         if (s->current_picture.f->key_frame)
1877             pkt->flags |= AV_PKT_FLAG_KEY;
1878         if (s->mb_info)
1879             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1880     } else {
1881         s->frame_bits = 0;
1882     }
1883
1884     /* release non-reference frames */
1885     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1886         if (!s->picture[i].reference)
1887             ff_mpeg_unref_picture(s, &s->picture[i]);
1888     }
1889
1890     av_assert1((s->frame_bits & 7) == 0);
1891
1892     pkt->size = s->frame_bits / 8;
1893     *got_packet = !!pkt->size;
1894     return 0;
1895 }
1896
1897 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1898                                                 int n, int threshold)
1899 {
1900     static const char tab[64] = {
1901         3, 2, 2, 1, 1, 1, 1, 1,
1902         1, 1, 1, 1, 1, 1, 1, 1,
1903         1, 1, 1, 1, 1, 1, 1, 1,
1904         0, 0, 0, 0, 0, 0, 0, 0,
1905         0, 0, 0, 0, 0, 0, 0, 0,
1906         0, 0, 0, 0, 0, 0, 0, 0,
1907         0, 0, 0, 0, 0, 0, 0, 0,
1908         0, 0, 0, 0, 0, 0, 0, 0
1909     };
1910     int score = 0;
1911     int run = 0;
1912     int i;
1913     int16_t *block = s->block[n];
1914     const int last_index = s->block_last_index[n];
1915     int skip_dc;
1916
1917     if (threshold < 0) {
1918         skip_dc = 0;
1919         threshold = -threshold;
1920     } else
1921         skip_dc = 1;
1922
1923     /* Are all we could set to zero already zero? */
1924     if (last_index <= skip_dc - 1)
1925         return;
1926
1927     for (i = 0; i <= last_index; i++) {
1928         const int j = s->intra_scantable.permutated[i];
1929         const int level = FFABS(block[j]);
1930         if (level == 1) {
1931             if (skip_dc && i == 0)
1932                 continue;
1933             score += tab[run];
1934             run = 0;
1935         } else if (level > 1) {
1936             return;
1937         } else {
1938             run++;
1939         }
1940     }
1941     if (score >= threshold)
1942         return;
1943     for (i = skip_dc; i <= last_index; i++) {
1944         const int j = s->intra_scantable.permutated[i];
1945         block[j] = 0;
1946     }
1947     if (block[0])
1948         s->block_last_index[n] = 0;
1949     else
1950         s->block_last_index[n] = -1;
1951 }
1952
1953 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1954                                int last_index)
1955 {
1956     int i;
1957     const int maxlevel = s->max_qcoeff;
1958     const int minlevel = s->min_qcoeff;
1959     int overflow = 0;
1960
1961     if (s->mb_intra) {
1962         i = 1; // skip clipping of intra dc
1963     } else
1964         i = 0;
1965
1966     for (; i <= last_index; i++) {
1967         const int j = s->intra_scantable.permutated[i];
1968         int level = block[j];
1969
1970         if (level > maxlevel) {
1971             level = maxlevel;
1972             overflow++;
1973         } else if (level < minlevel) {
1974             level = minlevel;
1975             overflow++;
1976         }
1977
1978         block[j] = level;
1979     }
1980
1981     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1982         av_log(s->avctx, AV_LOG_INFO,
1983                "warning, clipping %d dct coefficients to %d..%d\n",
1984                overflow, minlevel, maxlevel);
1985 }
1986
1987 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1988 {
1989     int x, y;
1990     // FIXME optimize
1991     for (y = 0; y < 8; y++) {
1992         for (x = 0; x < 8; x++) {
1993             int x2, y2;
1994             int sum = 0;
1995             int sqr = 0;
1996             int count = 0;
1997
1998             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1999                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2000                     int v = ptr[x2 + y2 * stride];
2001                     sum += v;
2002                     sqr += v * v;
2003                     count++;
2004                 }
2005             }
2006             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2007         }
2008     }
2009 }
2010
2011 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2012                                                 int motion_x, int motion_y,
2013                                                 int mb_block_height,
2014                                                 int mb_block_width,
2015                                                 int mb_block_count)
2016 {
2017     int16_t weight[12][64];
2018     int16_t orig[12][64];
2019     const int mb_x = s->mb_x;
2020     const int mb_y = s->mb_y;
2021     int i;
2022     int skip_dct[12];
2023     int dct_offset = s->linesize * 8; // default for progressive frames
2024     int uv_dct_offset = s->uvlinesize * 8;
2025     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2026     ptrdiff_t wrap_y, wrap_c;
2027
2028     for (i = 0; i < mb_block_count; i++)
2029         skip_dct[i] = s->skipdct;
2030
2031     if (s->adaptive_quant) {
2032         const int last_qp = s->qscale;
2033         const int mb_xy = mb_x + mb_y * s->mb_stride;
2034
2035         s->lambda = s->lambda_table[mb_xy];
2036         update_qscale(s);
2037
2038         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2039             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2040             s->dquant = s->qscale - last_qp;
2041
2042             if (s->out_format == FMT_H263) {
2043                 s->dquant = av_clip(s->dquant, -2, 2);
2044
2045                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2046                     if (!s->mb_intra) {
2047                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2048                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2049                                 s->dquant = 0;
2050                         }
2051                         if (s->mv_type == MV_TYPE_8X8)
2052                             s->dquant = 0;
2053                     }
2054                 }
2055             }
2056         }
2057         ff_set_qscale(s, last_qp + s->dquant);
2058     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2059         ff_set_qscale(s, s->qscale + s->dquant);
2060
2061     wrap_y = s->linesize;
2062     wrap_c = s->uvlinesize;
2063     ptr_y  = s->new_picture.f->data[0] +
2064              (mb_y * 16 * wrap_y)              + mb_x * 16;
2065     ptr_cb = s->new_picture.f->data[1] +
2066              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2067     ptr_cr = s->new_picture.f->data[2] +
2068              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2069
2070     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2071         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2072         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2073         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2074         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2075                                  wrap_y, wrap_y,
2076                                  16, 16, mb_x * 16, mb_y * 16,
2077                                  s->width, s->height);
2078         ptr_y = ebuf;
2079         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2080                                  wrap_c, wrap_c,
2081                                  mb_block_width, mb_block_height,
2082                                  mb_x * mb_block_width, mb_y * mb_block_height,
2083                                  cw, ch);
2084         ptr_cb = ebuf + 16 * wrap_y;
2085         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2086                                  wrap_c, wrap_c,
2087                                  mb_block_width, mb_block_height,
2088                                  mb_x * mb_block_width, mb_y * mb_block_height,
2089                                  cw, ch);
2090         ptr_cr = ebuf + 16 * wrap_y + 16;
2091     }
2092
2093     if (s->mb_intra) {
2094         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2095             int progressive_score, interlaced_score;
2096
2097             s->interlaced_dct = 0;
2098             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2099                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2100                                                      NULL, wrap_y, 8) - 400;
2101
2102             if (progressive_score > 0) {
2103                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2104                                                         NULL, wrap_y * 2, 8) +
2105                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2106                                                         NULL, wrap_y * 2, 8);
2107                 if (progressive_score > interlaced_score) {
2108                     s->interlaced_dct = 1;
2109
2110                     dct_offset = wrap_y;
2111                     uv_dct_offset = wrap_c;
2112                     wrap_y <<= 1;
2113                     if (s->chroma_format == CHROMA_422 ||
2114                         s->chroma_format == CHROMA_444)
2115                         wrap_c <<= 1;
2116                 }
2117             }
2118         }
2119
2120         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2121         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2122         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2123         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2124
2125         if (s->flags & CODEC_FLAG_GRAY) {
2126             skip_dct[4] = 1;
2127             skip_dct[5] = 1;
2128         } else {
2129             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2130             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2131             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2132                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2133                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2134             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2135                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2136                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2137                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2138                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2139                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2140                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2141             }
2142         }
2143     } else {
2144         op_pixels_func (*op_pix)[4];
2145         qpel_mc_func (*op_qpix)[16];
2146         uint8_t *dest_y, *dest_cb, *dest_cr;
2147
2148         dest_y  = s->dest[0];
2149         dest_cb = s->dest[1];
2150         dest_cr = s->dest[2];
2151
2152         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2153             op_pix  = s->hdsp.put_pixels_tab;
2154             op_qpix = s->qdsp.put_qpel_pixels_tab;
2155         } else {
2156             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2157             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2158         }
2159
2160         if (s->mv_dir & MV_DIR_FORWARD) {
2161             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2162                           s->last_picture.f->data,
2163                           op_pix, op_qpix);
2164             op_pix  = s->hdsp.avg_pixels_tab;
2165             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2166         }
2167         if (s->mv_dir & MV_DIR_BACKWARD) {
2168             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2169                           s->next_picture.f->data,
2170                           op_pix, op_qpix);
2171         }
2172
2173         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2174             int progressive_score, interlaced_score;
2175
2176             s->interlaced_dct = 0;
2177             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2178                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2179                                                      ptr_y + wrap_y * 8,
2180                                                      wrap_y, 8) - 400;
2181
2182             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2183                 progressive_score -= 400;
2184
2185             if (progressive_score > 0) {
2186                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2187                                                         wrap_y * 2, 8) +
2188                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2189                                                         ptr_y + wrap_y,
2190                                                         wrap_y * 2, 8);
2191
2192                 if (progressive_score > interlaced_score) {
2193                     s->interlaced_dct = 1;
2194
2195                     dct_offset = wrap_y;
2196                     uv_dct_offset = wrap_c;
2197                     wrap_y <<= 1;
2198                     if (s->chroma_format == CHROMA_422)
2199                         wrap_c <<= 1;
2200                 }
2201             }
2202         }
2203
2204         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2205         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2206         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2207                             dest_y + dct_offset, wrap_y);
2208         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2209                             dest_y + dct_offset + 8, wrap_y);
2210
2211         if (s->flags & CODEC_FLAG_GRAY) {
2212             skip_dct[4] = 1;
2213             skip_dct[5] = 1;
2214         } else {
2215             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2216             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2217             if (!s->chroma_y_shift) { /* 422 */
2218                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2219                                     dest_cb + uv_dct_offset, wrap_c);
2220                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2221                                     dest_cr + uv_dct_offset, wrap_c);
2222             }
2223         }
2224         /* pre quantization */
2225         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2226                 2 * s->qscale * s->qscale) {
2227             // FIXME optimize
2228             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2229                 skip_dct[0] = 1;
2230             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2231                 skip_dct[1] = 1;
2232             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2233                                wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[2] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2236                                wrap_y, 8) < 20 * s->qscale)
2237                 skip_dct[3] = 1;
2238             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2239                 skip_dct[4] = 1;
2240             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2241                 skip_dct[5] = 1;
2242             if (!s->chroma_y_shift) { /* 422 */
2243                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2244                                    dest_cb + uv_dct_offset,
2245                                    wrap_c, 8) < 20 * s->qscale)
2246                     skip_dct[6] = 1;
2247                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2248                                    dest_cr + uv_dct_offset,
2249                                    wrap_c, 8) < 20 * s->qscale)
2250                     skip_dct[7] = 1;
2251             }
2252         }
2253     }
2254
2255     if (s->quantizer_noise_shaping) {
2256         if (!skip_dct[0])
2257             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2258         if (!skip_dct[1])
2259             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2260         if (!skip_dct[2])
2261             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2262         if (!skip_dct[3])
2263             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2264         if (!skip_dct[4])
2265             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2266         if (!skip_dct[5])
2267             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2268         if (!s->chroma_y_shift) { /* 422 */
2269             if (!skip_dct[6])
2270                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2271                                   wrap_c);
2272             if (!skip_dct[7])
2273                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2274                                   wrap_c);
2275         }
2276         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2277     }
2278
2279     /* DCT & quantize */
2280     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2281     {
2282         for (i = 0; i < mb_block_count; i++) {
2283             if (!skip_dct[i]) {
2284                 int overflow;
2285                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2286                 // FIXME we could decide to change to quantizer instead of
2287                 // clipping
2288                 // JS: I don't think that would be a good idea it could lower
2289                 //     quality instead of improve it. Just INTRADC clipping
2290                 //     deserves changes in quantizer
2291                 if (overflow)
2292                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2293             } else
2294                 s->block_last_index[i] = -1;
2295         }
2296         if (s->quantizer_noise_shaping) {
2297             for (i = 0; i < mb_block_count; i++) {
2298                 if (!skip_dct[i]) {
2299                     s->block_last_index[i] =
2300                         dct_quantize_refine(s, s->block[i], weight[i],
2301                                             orig[i], i, s->qscale);
2302                 }
2303             }
2304         }
2305
2306         if (s->luma_elim_threshold && !s->mb_intra)
2307             for (i = 0; i < 4; i++)
2308                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2309         if (s->chroma_elim_threshold && !s->mb_intra)
2310             for (i = 4; i < mb_block_count; i++)
2311                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2312
2313         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2314             for (i = 0; i < mb_block_count; i++) {
2315                 if (s->block_last_index[i] == -1)
2316                     s->coded_score[i] = INT_MAX / 256;
2317             }
2318         }
2319     }
2320
2321     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2322         s->block_last_index[4] =
2323         s->block_last_index[5] = 0;
2324         s->block[4][0] =
2325         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2326         if (!s->chroma_y_shift) { /* 422 / 444 */
2327             for (i=6; i<12; i++) {
2328                 s->block_last_index[i] = 0;
2329                 s->block[i][0] = s->block[4][0];
2330             }
2331         }
2332     }
2333
2334     // non c quantize code returns incorrect block_last_index FIXME
2335     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2336         for (i = 0; i < mb_block_count; i++) {
2337             int j;
2338             if (s->block_last_index[i] > 0) {
2339                 for (j = 63; j > 0; j--) {
2340                     if (s->block[i][s->intra_scantable.permutated[j]])
2341                         break;
2342                 }
2343                 s->block_last_index[i] = j;
2344             }
2345         }
2346     }
2347
2348     /* huffman encode */
2349     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2350     case AV_CODEC_ID_MPEG1VIDEO:
2351     case AV_CODEC_ID_MPEG2VIDEO:
2352         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2353             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2354         break;
2355     case AV_CODEC_ID_MPEG4:
2356         if (CONFIG_MPEG4_ENCODER)
2357             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2358         break;
2359     case AV_CODEC_ID_MSMPEG4V2:
2360     case AV_CODEC_ID_MSMPEG4V3:
2361     case AV_CODEC_ID_WMV1:
2362         if (CONFIG_MSMPEG4_ENCODER)
2363             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2364         break;
2365     case AV_CODEC_ID_WMV2:
2366         if (CONFIG_WMV2_ENCODER)
2367             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2368         break;
2369     case AV_CODEC_ID_H261:
2370         if (CONFIG_H261_ENCODER)
2371             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2372         break;
2373     case AV_CODEC_ID_H263:
2374     case AV_CODEC_ID_H263P:
2375     case AV_CODEC_ID_FLV1:
2376     case AV_CODEC_ID_RV10:
2377     case AV_CODEC_ID_RV20:
2378         if (CONFIG_H263_ENCODER)
2379             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2380         break;
2381     case AV_CODEC_ID_MJPEG:
2382     case AV_CODEC_ID_AMV:
2383         if (CONFIG_MJPEG_ENCODER)
2384             ff_mjpeg_encode_mb(s, s->block);
2385         break;
2386     default:
2387         av_assert1(0);
2388     }
2389 }
2390
2391 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2392 {
2393     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2394     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2395     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2396 }
2397
2398 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2399     int i;
2400
2401     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2402
2403     /* mpeg1 */
2404     d->mb_skip_run= s->mb_skip_run;
2405     for(i=0; i<3; i++)
2406         d->last_dc[i] = s->last_dc[i];
2407
2408     /* statistics */
2409     d->mv_bits= s->mv_bits;
2410     d->i_tex_bits= s->i_tex_bits;
2411     d->p_tex_bits= s->p_tex_bits;
2412     d->i_count= s->i_count;
2413     d->f_count= s->f_count;
2414     d->b_count= s->b_count;
2415     d->skip_count= s->skip_count;
2416     d->misc_bits= s->misc_bits;
2417     d->last_bits= 0;
2418
2419     d->mb_skipped= 0;
2420     d->qscale= s->qscale;
2421     d->dquant= s->dquant;
2422
2423     d->esc3_level_length= s->esc3_level_length;
2424 }
2425
2426 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2427     int i;
2428
2429     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2430     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2431
2432     /* mpeg1 */
2433     d->mb_skip_run= s->mb_skip_run;
2434     for(i=0; i<3; i++)
2435         d->last_dc[i] = s->last_dc[i];
2436
2437     /* statistics */
2438     d->mv_bits= s->mv_bits;
2439     d->i_tex_bits= s->i_tex_bits;
2440     d->p_tex_bits= s->p_tex_bits;
2441     d->i_count= s->i_count;
2442     d->f_count= s->f_count;
2443     d->b_count= s->b_count;
2444     d->skip_count= s->skip_count;
2445     d->misc_bits= s->misc_bits;
2446
2447     d->mb_intra= s->mb_intra;
2448     d->mb_skipped= s->mb_skipped;
2449     d->mv_type= s->mv_type;
2450     d->mv_dir= s->mv_dir;
2451     d->pb= s->pb;
2452     if(s->data_partitioning){
2453         d->pb2= s->pb2;
2454         d->tex_pb= s->tex_pb;
2455     }
2456     d->block= s->block;
2457     for(i=0; i<8; i++)
2458         d->block_last_index[i]= s->block_last_index[i];
2459     d->interlaced_dct= s->interlaced_dct;
2460     d->qscale= s->qscale;
2461
2462     d->esc3_level_length= s->esc3_level_length;
2463 }
2464
2465 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2466                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2467                            int *dmin, int *next_block, int motion_x, int motion_y)
2468 {
2469     int score;
2470     uint8_t *dest_backup[3];
2471
2472     copy_context_before_encode(s, backup, type);
2473
2474     s->block= s->blocks[*next_block];
2475     s->pb= pb[*next_block];
2476     if(s->data_partitioning){
2477         s->pb2   = pb2   [*next_block];
2478         s->tex_pb= tex_pb[*next_block];
2479     }
2480
2481     if(*next_block){
2482         memcpy(dest_backup, s->dest, sizeof(s->dest));
2483         s->dest[0] = s->rd_scratchpad;
2484         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2485         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2486         av_assert0(s->linesize >= 32); //FIXME
2487     }
2488
2489     encode_mb(s, motion_x, motion_y);
2490
2491     score= put_bits_count(&s->pb);
2492     if(s->data_partitioning){
2493         score+= put_bits_count(&s->pb2);
2494         score+= put_bits_count(&s->tex_pb);
2495     }
2496
2497     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2498         ff_MPV_decode_mb(s, s->block);
2499
2500         score *= s->lambda2;
2501         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2502     }
2503
2504     if(*next_block){
2505         memcpy(s->dest, dest_backup, sizeof(s->dest));
2506     }
2507
2508     if(score<*dmin){
2509         *dmin= score;
2510         *next_block^=1;
2511
2512         copy_context_after_encode(best, s, type);
2513     }
2514 }
2515
2516 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2517     uint32_t *sq = ff_square_tab + 256;
2518     int acc=0;
2519     int x,y;
2520
2521     if(w==16 && h==16)
2522         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2523     else if(w==8 && h==8)
2524         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2525
2526     for(y=0; y<h; y++){
2527         for(x=0; x<w; x++){
2528             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2529         }
2530     }
2531
2532     av_assert2(acc>=0);
2533
2534     return acc;
2535 }
2536
2537 static int sse_mb(MpegEncContext *s){
2538     int w= 16;
2539     int h= 16;
2540
2541     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2542     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2543
2544     if(w==16 && h==16)
2545       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2546         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2547                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2548                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2549       }else{
2550         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2551                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2552                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2553       }
2554     else
2555         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2556                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2557                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2558 }
2559
2560 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2561     MpegEncContext *s= *(void**)arg;
2562
2563
2564     s->me.pre_pass=1;
2565     s->me.dia_size= s->avctx->pre_dia_size;
2566     s->first_slice_line=1;
2567     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2568         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2569             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2570         }
2571         s->first_slice_line=0;
2572     }
2573
2574     s->me.pre_pass=0;
2575
2576     return 0;
2577 }
2578
2579 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2580     MpegEncContext *s= *(void**)arg;
2581
2582     ff_check_alignment();
2583
2584     s->me.dia_size= s->avctx->dia_size;
2585     s->first_slice_line=1;
2586     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2587         s->mb_x=0; //for block init below
2588         ff_init_block_index(s);
2589         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2590             s->block_index[0]+=2;
2591             s->block_index[1]+=2;
2592             s->block_index[2]+=2;
2593             s->block_index[3]+=2;
2594
2595             /* compute motion vector & mb_type and store in context */
2596             if(s->pict_type==AV_PICTURE_TYPE_B)
2597                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2598             else
2599                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2600         }
2601         s->first_slice_line=0;
2602     }
2603     return 0;
2604 }
2605
2606 static int mb_var_thread(AVCodecContext *c, void *arg){
2607     MpegEncContext *s= *(void**)arg;
2608     int mb_x, mb_y;
2609
2610     ff_check_alignment();
2611
2612     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2613         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2614             int xx = mb_x * 16;
2615             int yy = mb_y * 16;
2616             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2617             int varc;
2618             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2619
2620             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2621                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2622
2623             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2624             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2625             s->me.mb_var_sum_temp    += varc;
2626         }
2627     }
2628     return 0;
2629 }
2630
2631 static void write_slice_end(MpegEncContext *s){
2632     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2633         if(s->partitioned_frame){
2634             ff_mpeg4_merge_partitions(s);
2635         }
2636
2637         ff_mpeg4_stuffing(&s->pb);
2638     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2639         ff_mjpeg_encode_stuffing(s);
2640     }
2641
2642     avpriv_align_put_bits(&s->pb);
2643     flush_put_bits(&s->pb);
2644
2645     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2646         s->misc_bits+= get_bits_diff(s);
2647 }
2648
2649 static void write_mb_info(MpegEncContext *s)
2650 {
2651     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2652     int offset = put_bits_count(&s->pb);
2653     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2654     int gobn = s->mb_y / s->gob_index;
2655     int pred_x, pred_y;
2656     if (CONFIG_H263_ENCODER)
2657         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2658     bytestream_put_le32(&ptr, offset);
2659     bytestream_put_byte(&ptr, s->qscale);
2660     bytestream_put_byte(&ptr, gobn);
2661     bytestream_put_le16(&ptr, mba);
2662     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2663     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2664     /* 4MV not implemented */
2665     bytestream_put_byte(&ptr, 0); /* hmv2 */
2666     bytestream_put_byte(&ptr, 0); /* vmv2 */
2667 }
2668
2669 static void update_mb_info(MpegEncContext *s, int startcode)
2670 {
2671     if (!s->mb_info)
2672         return;
2673     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2674         s->mb_info_size += 12;
2675         s->prev_mb_info = s->last_mb_info;
2676     }
2677     if (startcode) {
2678         s->prev_mb_info = put_bits_count(&s->pb)/8;
2679         /* This might have incremented mb_info_size above, and we return without
2680          * actually writing any info into that slot yet. But in that case,
2681          * this will be called again at the start of the after writing the
2682          * start code, actually writing the mb info. */
2683         return;
2684     }
2685
2686     s->last_mb_info = put_bits_count(&s->pb)/8;
2687     if (!s->mb_info_size)
2688         s->mb_info_size += 12;
2689     write_mb_info(s);
2690 }
2691
2692 static int encode_thread(AVCodecContext *c, void *arg){
2693     MpegEncContext *s= *(void**)arg;
2694     int mb_x, mb_y, pdif = 0;
2695     int chr_h= 16>>s->chroma_y_shift;
2696     int i, j;
2697     MpegEncContext best_s, backup_s;
2698     uint8_t bit_buf[2][MAX_MB_BYTES];
2699     uint8_t bit_buf2[2][MAX_MB_BYTES];
2700     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2701     PutBitContext pb[2], pb2[2], tex_pb[2];
2702
2703     ff_check_alignment();
2704
2705     for(i=0; i<2; i++){
2706         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2707         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2708         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2709     }
2710
2711     s->last_bits= put_bits_count(&s->pb);
2712     s->mv_bits=0;
2713     s->misc_bits=0;
2714     s->i_tex_bits=0;
2715     s->p_tex_bits=0;
2716     s->i_count=0;
2717     s->f_count=0;
2718     s->b_count=0;
2719     s->skip_count=0;
2720
2721     for(i=0; i<3; i++){
2722         /* init last dc values */
2723         /* note: quant matrix value (8) is implied here */
2724         s->last_dc[i] = 128 << s->intra_dc_precision;
2725
2726         s->current_picture.error[i] = 0;
2727     }
2728     if(s->codec_id==AV_CODEC_ID_AMV){
2729         s->last_dc[0] = 128*8/13;
2730         s->last_dc[1] = 128*8/14;
2731         s->last_dc[2] = 128*8/14;
2732     }
2733     s->mb_skip_run = 0;
2734     memset(s->last_mv, 0, sizeof(s->last_mv));
2735
2736     s->last_mv_dir = 0;
2737
2738     switch(s->codec_id){
2739     case AV_CODEC_ID_H263:
2740     case AV_CODEC_ID_H263P:
2741     case AV_CODEC_ID_FLV1:
2742         if (CONFIG_H263_ENCODER)
2743             s->gob_index = ff_h263_get_gob_height(s);
2744         break;
2745     case AV_CODEC_ID_MPEG4:
2746         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2747             ff_mpeg4_init_partitions(s);
2748         break;
2749     }
2750
2751     s->resync_mb_x=0;
2752     s->resync_mb_y=0;
2753     s->first_slice_line = 1;
2754     s->ptr_lastgob = s->pb.buf;
2755     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2756         s->mb_x=0;
2757         s->mb_y= mb_y;
2758
2759         ff_set_qscale(s, s->qscale);
2760         ff_init_block_index(s);
2761
2762         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2763             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2764             int mb_type= s->mb_type[xy];
2765 //            int d;
2766             int dmin= INT_MAX;
2767             int dir;
2768
2769             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2770                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2771                 return -1;
2772             }
2773             if(s->data_partitioning){
2774                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2775                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2776                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2777                     return -1;
2778                 }
2779             }
2780
2781             s->mb_x = mb_x;
2782             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2783             ff_update_block_index(s);
2784
2785             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2786                 ff_h261_reorder_mb_index(s);
2787                 xy= s->mb_y*s->mb_stride + s->mb_x;
2788                 mb_type= s->mb_type[xy];
2789             }
2790
2791             /* write gob / video packet header  */
2792             if(s->rtp_mode){
2793                 int current_packet_size, is_gob_start;
2794
2795                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2796
2797                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2798
2799                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2800
2801                 switch(s->codec_id){
2802                 case AV_CODEC_ID_H263:
2803                 case AV_CODEC_ID_H263P:
2804                     if(!s->h263_slice_structured)
2805                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2806                     break;
2807                 case AV_CODEC_ID_MPEG2VIDEO:
2808                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2809                 case AV_CODEC_ID_MPEG1VIDEO:
2810                     if(s->mb_skip_run) is_gob_start=0;
2811                     break;
2812                 case AV_CODEC_ID_MJPEG:
2813                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2814                     break;
2815                 }
2816
2817                 if(is_gob_start){
2818                     if(s->start_mb_y != mb_y || mb_x!=0){
2819                         write_slice_end(s);
2820
2821                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2822                             ff_mpeg4_init_partitions(s);
2823                         }
2824                     }
2825
2826                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2827                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2828
2829                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2830                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2831                         int d = 100 / s->error_rate;
2832                         if(r % d == 0){
2833                             current_packet_size=0;
2834                             s->pb.buf_ptr= s->ptr_lastgob;
2835                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2836                         }
2837                     }
2838
2839                     if (s->avctx->rtp_callback){
2840                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2841                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2842                     }
2843                     update_mb_info(s, 1);
2844
2845                     switch(s->codec_id){
2846                     case AV_CODEC_ID_MPEG4:
2847                         if (CONFIG_MPEG4_ENCODER) {
2848                             ff_mpeg4_encode_video_packet_header(s);
2849                             ff_mpeg4_clean_buffers(s);
2850                         }
2851                     break;
2852                     case AV_CODEC_ID_MPEG1VIDEO:
2853                     case AV_CODEC_ID_MPEG2VIDEO:
2854                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2855                             ff_mpeg1_encode_slice_header(s);
2856                             ff_mpeg1_clean_buffers(s);
2857                         }
2858                     break;
2859                     case AV_CODEC_ID_H263:
2860                     case AV_CODEC_ID_H263P:
2861                         if (CONFIG_H263_ENCODER)
2862                             ff_h263_encode_gob_header(s, mb_y);
2863                     break;
2864                     }
2865
2866                     if(s->flags&CODEC_FLAG_PASS1){
2867                         int bits= put_bits_count(&s->pb);
2868                         s->misc_bits+= bits - s->last_bits;
2869                         s->last_bits= bits;
2870                     }
2871
2872                     s->ptr_lastgob += current_packet_size;
2873                     s->first_slice_line=1;
2874                     s->resync_mb_x=mb_x;
2875                     s->resync_mb_y=mb_y;
2876                 }
2877             }
2878
2879             if(  (s->resync_mb_x   == s->mb_x)
2880                && s->resync_mb_y+1 == s->mb_y){
2881                 s->first_slice_line=0;
2882             }
2883
2884             s->mb_skipped=0;
2885             s->dquant=0; //only for QP_RD
2886
2887             update_mb_info(s, 0);
2888
2889             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2890                 int next_block=0;
2891                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2892
2893                 copy_context_before_encode(&backup_s, s, -1);
2894                 backup_s.pb= s->pb;
2895                 best_s.data_partitioning= s->data_partitioning;
2896                 best_s.partitioned_frame= s->partitioned_frame;
2897                 if(s->data_partitioning){
2898                     backup_s.pb2= s->pb2;
2899                     backup_s.tex_pb= s->tex_pb;
2900                 }
2901
2902                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2903                     s->mv_dir = MV_DIR_FORWARD;
2904                     s->mv_type = MV_TYPE_16X16;
2905                     s->mb_intra= 0;
2906                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2907                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2908                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2909                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2910                 }
2911                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2912                     s->mv_dir = MV_DIR_FORWARD;
2913                     s->mv_type = MV_TYPE_FIELD;
2914                     s->mb_intra= 0;
2915                     for(i=0; i<2; i++){
2916                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2917                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2918                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2919                     }
2920                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2921                                  &dmin, &next_block, 0, 0);
2922                 }
2923                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2924                     s->mv_dir = MV_DIR_FORWARD;
2925                     s->mv_type = MV_TYPE_16X16;
2926                     s->mb_intra= 0;
2927                     s->mv[0][0][0] = 0;
2928                     s->mv[0][0][1] = 0;
2929                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2930                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2931                 }
2932                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2933                     s->mv_dir = MV_DIR_FORWARD;
2934                     s->mv_type = MV_TYPE_8X8;
2935                     s->mb_intra= 0;
2936                     for(i=0; i<4; i++){
2937                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2938                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2939                     }
2940                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2941                                  &dmin, &next_block, 0, 0);
2942                 }
2943                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2944                     s->mv_dir = MV_DIR_FORWARD;
2945                     s->mv_type = MV_TYPE_16X16;
2946                     s->mb_intra= 0;
2947                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2948                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2949                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2950                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2951                 }
2952                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2953                     s->mv_dir = MV_DIR_BACKWARD;
2954                     s->mv_type = MV_TYPE_16X16;
2955                     s->mb_intra= 0;
2956                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2957                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2958                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2959                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2960                 }
2961                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2962                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2963                     s->mv_type = MV_TYPE_16X16;
2964                     s->mb_intra= 0;
2965                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2966                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2967                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2968                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2969                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2970                                  &dmin, &next_block, 0, 0);
2971                 }
2972                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2973                     s->mv_dir = MV_DIR_FORWARD;
2974                     s->mv_type = MV_TYPE_FIELD;
2975                     s->mb_intra= 0;
2976                     for(i=0; i<2; i++){
2977                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2978                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2979                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2980                     }
2981                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2982                                  &dmin, &next_block, 0, 0);
2983                 }
2984                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2985                     s->mv_dir = MV_DIR_BACKWARD;
2986                     s->mv_type = MV_TYPE_FIELD;
2987                     s->mb_intra= 0;
2988                     for(i=0; i<2; i++){
2989                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2990                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2991                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2992                     }
2993                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2994                                  &dmin, &next_block, 0, 0);
2995                 }
2996                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2997                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2998                     s->mv_type = MV_TYPE_FIELD;
2999                     s->mb_intra= 0;
3000                     for(dir=0; dir<2; dir++){
3001                         for(i=0; i<2; i++){
3002                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3003                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3004                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3005                         }
3006                     }
3007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3008                                  &dmin, &next_block, 0, 0);
3009                 }
3010                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3011                     s->mv_dir = 0;
3012                     s->mv_type = MV_TYPE_16X16;
3013                     s->mb_intra= 1;
3014                     s->mv[0][0][0] = 0;
3015                     s->mv[0][0][1] = 0;
3016                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3017                                  &dmin, &next_block, 0, 0);
3018                     if(s->h263_pred || s->h263_aic){
3019                         if(best_s.mb_intra)
3020                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3021                         else
3022                             ff_clean_intra_table_entries(s); //old mode?
3023                     }
3024                 }
3025
3026                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3027                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3028                         const int last_qp= backup_s.qscale;
3029                         int qpi, qp, dc[6];
3030                         int16_t ac[6][16];
3031                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3032                         static const int dquant_tab[4]={-1,1,-2,2};
3033                         int storecoefs = s->mb_intra && s->dc_val[0];
3034
3035                         av_assert2(backup_s.dquant == 0);
3036
3037                         //FIXME intra
3038                         s->mv_dir= best_s.mv_dir;
3039                         s->mv_type = MV_TYPE_16X16;
3040                         s->mb_intra= best_s.mb_intra;
3041                         s->mv[0][0][0] = best_s.mv[0][0][0];
3042                         s->mv[0][0][1] = best_s.mv[0][0][1];
3043                         s->mv[1][0][0] = best_s.mv[1][0][0];
3044                         s->mv[1][0][1] = best_s.mv[1][0][1];
3045
3046                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3047                         for(; qpi<4; qpi++){
3048                             int dquant= dquant_tab[qpi];
3049                             qp= last_qp + dquant;
3050                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3051                                 continue;
3052                             backup_s.dquant= dquant;
3053                             if(storecoefs){
3054                                 for(i=0; i<6; i++){
3055                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3056                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3057                                 }
3058                             }
3059
3060                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3061                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3062                             if(best_s.qscale != qp){
3063                                 if(storecoefs){
3064                                     for(i=0; i<6; i++){
3065                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3066                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3067                                     }
3068                                 }
3069                             }
3070                         }
3071                     }
3072                 }
3073                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3074                     int mx= s->b_direct_mv_table[xy][0];
3075                     int my= s->b_direct_mv_table[xy][1];
3076
3077                     backup_s.dquant = 0;
3078                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3079                     s->mb_intra= 0;
3080                     ff_mpeg4_set_direct_mv(s, mx, my);
3081                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3082                                  &dmin, &next_block, mx, my);
3083                 }
3084                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3085                     backup_s.dquant = 0;
3086                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3087                     s->mb_intra= 0;
3088                     ff_mpeg4_set_direct_mv(s, 0, 0);
3089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3090                                  &dmin, &next_block, 0, 0);
3091                 }
3092                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3093                     int coded=0;
3094                     for(i=0; i<6; i++)
3095                         coded |= s->block_last_index[i];
3096                     if(coded){
3097                         int mx,my;
3098                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3099                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3100                             mx=my=0; //FIXME find the one we actually used
3101                             ff_mpeg4_set_direct_mv(s, mx, my);
3102                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3103                             mx= s->mv[1][0][0];
3104                             my= s->mv[1][0][1];
3105                         }else{
3106                             mx= s->mv[0][0][0];
3107                             my= s->mv[0][0][1];
3108                         }
3109
3110                         s->mv_dir= best_s.mv_dir;
3111                         s->mv_type = best_s.mv_type;
3112                         s->mb_intra= 0;
3113 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3114                         s->mv[0][0][1] = best_s.mv[0][0][1];
3115                         s->mv[1][0][0] = best_s.mv[1][0][0];
3116                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3117                         backup_s.dquant= 0;
3118                         s->skipdct=1;
3119                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3120                                         &dmin, &next_block, mx, my);
3121                         s->skipdct=0;
3122                     }
3123                 }
3124
3125                 s->current_picture.qscale_table[xy] = best_s.qscale;
3126
3127                 copy_context_after_encode(s, &best_s, -1);
3128
3129                 pb_bits_count= put_bits_count(&s->pb);
3130                 flush_put_bits(&s->pb);
3131                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3132                 s->pb= backup_s.pb;
3133
3134                 if(s->data_partitioning){
3135                     pb2_bits_count= put_bits_count(&s->pb2);
3136                     flush_put_bits(&s->pb2);
3137                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3138                     s->pb2= backup_s.pb2;
3139
3140                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3141                     flush_put_bits(&s->tex_pb);
3142                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3143                     s->tex_pb= backup_s.tex_pb;
3144                 }
3145                 s->last_bits= put_bits_count(&s->pb);
3146
3147                 if (CONFIG_H263_ENCODER &&
3148                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3149                     ff_h263_update_motion_val(s);
3150
3151                 if(next_block==0){ //FIXME 16 vs linesize16
3152                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3153                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3154                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3155                 }
3156
3157                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3158                     ff_MPV_decode_mb(s, s->block);
3159             } else {
3160                 int motion_x = 0, motion_y = 0;
3161                 s->mv_type=MV_TYPE_16X16;
3162                 // only one MB-Type possible
3163
3164                 switch(mb_type){
3165                 case CANDIDATE_MB_TYPE_INTRA:
3166                     s->mv_dir = 0;
3167                     s->mb_intra= 1;
3168                     motion_x= s->mv[0][0][0] = 0;
3169                     motion_y= s->mv[0][0][1] = 0;
3170                     break;
3171                 case CANDIDATE_MB_TYPE_INTER:
3172                     s->mv_dir = MV_DIR_FORWARD;
3173                     s->mb_intra= 0;
3174                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3175                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3176                     break;
3177                 case CANDIDATE_MB_TYPE_INTER_I:
3178                     s->mv_dir = MV_DIR_FORWARD;
3179                     s->mv_type = MV_TYPE_FIELD;
3180                     s->mb_intra= 0;
3181                     for(i=0; i<2; i++){
3182                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3183                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3184                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3185                     }
3186                     break;
3187                 case CANDIDATE_MB_TYPE_INTER4V:
3188                     s->mv_dir = MV_DIR_FORWARD;
3189                     s->mv_type = MV_TYPE_8X8;
3190                     s->mb_intra= 0;
3191                     for(i=0; i<4; i++){
3192                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3193                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3194                     }
3195                     break;
3196                 case CANDIDATE_MB_TYPE_DIRECT:
3197                     if (CONFIG_MPEG4_ENCODER) {
3198                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3199                         s->mb_intra= 0;
3200                         motion_x=s->b_direct_mv_table[xy][0];
3201                         motion_y=s->b_direct_mv_table[xy][1];
3202                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3203                     }
3204                     break;
3205                 case CANDIDATE_MB_TYPE_DIRECT0:
3206                     if (CONFIG_MPEG4_ENCODER) {
3207                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3208                         s->mb_intra= 0;
3209                         ff_mpeg4_set_direct_mv(s, 0, 0);
3210                     }
3211                     break;
3212                 case CANDIDATE_MB_TYPE_BIDIR:
3213                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3214                     s->mb_intra= 0;
3215                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3216                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3217                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3218                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3219                     break;
3220                 case CANDIDATE_MB_TYPE_BACKWARD:
3221                     s->mv_dir = MV_DIR_BACKWARD;
3222                     s->mb_intra= 0;
3223                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3224                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3225                     break;
3226                 case CANDIDATE_MB_TYPE_FORWARD:
3227                     s->mv_dir = MV_DIR_FORWARD;
3228                     s->mb_intra= 0;
3229                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3230                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3231                     break;
3232                 case CANDIDATE_MB_TYPE_FORWARD_I:
3233                     s->mv_dir = MV_DIR_FORWARD;
3234                     s->mv_type = MV_TYPE_FIELD;
3235                     s->mb_intra= 0;
3236                     for(i=0; i<2; i++){
3237                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3238                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3239                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3240                     }
3241                     break;
3242                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3243                     s->mv_dir = MV_DIR_BACKWARD;
3244                     s->mv_type = MV_TYPE_FIELD;
3245                     s->mb_intra= 0;
3246                     for(i=0; i<2; i++){
3247                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3248                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3249                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3250                     }
3251                     break;
3252                 case CANDIDATE_MB_TYPE_BIDIR_I:
3253                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3254                     s->mv_type = MV_TYPE_FIELD;
3255                     s->mb_intra= 0;
3256                     for(dir=0; dir<2; dir++){
3257                         for(i=0; i<2; i++){
3258                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3259                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3260                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3261                         }
3262                     }
3263                     break;
3264                 default:
3265                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3266                 }
3267
3268                 encode_mb(s, motion_x, motion_y);
3269
3270                 // RAL: Update last macroblock type
3271                 s->last_mv_dir = s->mv_dir;
3272
3273                 if (CONFIG_H263_ENCODER &&
3274                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3275                     ff_h263_update_motion_val(s);
3276
3277                 ff_MPV_decode_mb(s, s->block);
3278             }
3279
3280             /* clean the MV table in IPS frames for direct mode in B frames */
3281             if(s->mb_intra /* && I,P,S_TYPE */){
3282                 s->p_mv_table[xy][0]=0;
3283                 s->p_mv_table[xy][1]=0;
3284             }
3285
3286             if(s->flags&CODEC_FLAG_PSNR){
3287                 int w= 16;
3288                 int h= 16;
3289
3290                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3291                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3292
3293                 s->current_picture.error[0] += sse(
3294                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3295                     s->dest[0], w, h, s->linesize);
3296                 s->current_picture.error[1] += sse(
3297                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3298                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3299                 s->current_picture.error[2] += sse(
3300                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3301                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3302             }
3303             if(s->loop_filter){
3304                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3305                     ff_h263_loop_filter(s);
3306             }
3307             av_dlog(s->avctx, "MB %d %d bits\n",
3308                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3309         }
3310     }
3311
3312     //not beautiful here but we must write it before flushing so it has to be here
3313     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3314         ff_msmpeg4_encode_ext_header(s);
3315
3316     write_slice_end(s);
3317
3318     /* Send the last GOB if RTP */
3319     if (s->avctx->rtp_callback) {
3320         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3321         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3322         /* Call the RTP callback to send the last GOB */
3323         emms_c();
3324         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3325     }
3326
3327     return 0;
3328 }
3329
3330 #define MERGE(field) dst->field += src->field; src->field=0
3331 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3332     MERGE(me.scene_change_score);
3333     MERGE(me.mc_mb_var_sum_temp);
3334     MERGE(me.mb_var_sum_temp);
3335 }
3336
3337 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3338     int i;
3339
3340     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3341     MERGE(dct_count[1]);
3342     MERGE(mv_bits);
3343     MERGE(i_tex_bits);
3344     MERGE(p_tex_bits);
3345     MERGE(i_count);
3346     MERGE(f_count);
3347     MERGE(b_count);
3348     MERGE(skip_count);
3349     MERGE(misc_bits);
3350     MERGE(er.error_count);
3351     MERGE(padding_bug_score);
3352     MERGE(current_picture.error[0]);
3353     MERGE(current_picture.error[1]);
3354     MERGE(current_picture.error[2]);
3355
3356     if(dst->avctx->noise_reduction){
3357         for(i=0; i<64; i++){
3358             MERGE(dct_error_sum[0][i]);
3359             MERGE(dct_error_sum[1][i]);
3360         }
3361     }
3362
3363     assert(put_bits_count(&src->pb) % 8 ==0);
3364     assert(put_bits_count(&dst->pb) % 8 ==0);
3365     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3366     flush_put_bits(&dst->pb);
3367 }
3368
3369 static int estimate_qp(MpegEncContext *s, int dry_run){
3370     if (s->next_lambda){
3371         s->current_picture_ptr->f->quality =
3372         s->current_picture.f->quality = s->next_lambda;
3373         if(!dry_run) s->next_lambda= 0;
3374     } else if (!s->fixed_qscale) {
3375         s->current_picture_ptr->f->quality =
3376         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3377         if (s->current_picture.f->quality < 0)
3378             return -1;
3379     }
3380
3381     if(s->adaptive_quant){
3382         switch(s->codec_id){
3383         case AV_CODEC_ID_MPEG4:
3384             if (CONFIG_MPEG4_ENCODER)
3385                 ff_clean_mpeg4_qscales(s);
3386             break;
3387         case AV_CODEC_ID_H263:
3388         case AV_CODEC_ID_H263P:
3389         case AV_CODEC_ID_FLV1:
3390             if (CONFIG_H263_ENCODER)
3391                 ff_clean_h263_qscales(s);
3392             break;
3393         default:
3394             ff_init_qscale_tab(s);
3395         }
3396
3397         s->lambda= s->lambda_table[0];
3398         //FIXME broken
3399     }else
3400         s->lambda = s->current_picture.f->quality;
3401     update_qscale(s);
3402     return 0;
3403 }
3404
3405 /* must be called before writing the header */
3406 static void set_frame_distances(MpegEncContext * s){
3407     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3408     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3409
3410     if(s->pict_type==AV_PICTURE_TYPE_B){
3411         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3412         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3413     }else{
3414         s->pp_time= s->time - s->last_non_b_time;
3415         s->last_non_b_time= s->time;
3416         assert(s->picture_number==0 || s->pp_time > 0);
3417     }
3418 }
3419
3420 static int encode_picture(MpegEncContext *s, int picture_number)
3421 {
3422     int i, ret;
3423     int bits;
3424     int context_count = s->slice_context_count;
3425
3426     s->picture_number = picture_number;
3427
3428     /* Reset the average MB variance */
3429     s->me.mb_var_sum_temp    =
3430     s->me.mc_mb_var_sum_temp = 0;
3431
3432     /* we need to initialize some time vars before we can encode b-frames */
3433     // RAL: Condition added for MPEG1VIDEO
3434     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3435         set_frame_distances(s);
3436     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3437         ff_set_mpeg4_time(s);
3438
3439     s->me.scene_change_score=0;
3440
3441 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3442
3443     if(s->pict_type==AV_PICTURE_TYPE_I){
3444         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3445         else                        s->no_rounding=0;
3446     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3447         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3448             s->no_rounding ^= 1;
3449     }
3450
3451     if(s->flags & CODEC_FLAG_PASS2){
3452         if (estimate_qp(s,1) < 0)
3453             return -1;
3454         ff_get_2pass_fcode(s);
3455     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3456         if(s->pict_type==AV_PICTURE_TYPE_B)
3457             s->lambda= s->last_lambda_for[s->pict_type];
3458         else
3459             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3460         update_qscale(s);
3461     }
3462
3463     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3464         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3465         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3466         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3467         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3468     }
3469
3470     s->mb_intra=0; //for the rate distortion & bit compare functions
3471     for(i=1; i<context_count; i++){
3472         ret = ff_update_duplicate_context(s->thread_context[i], s);
3473         if (ret < 0)
3474             return ret;
3475     }
3476
3477     if(ff_init_me(s)<0)
3478         return -1;
3479
3480     /* Estimate motion for every MB */
3481     if(s->pict_type != AV_PICTURE_TYPE_I){
3482         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3483         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3484         if (s->pict_type != AV_PICTURE_TYPE_B) {
3485             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3486                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3487             }
3488         }
3489
3490         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3491     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3492         /* I-Frame */
3493         for(i=0; i<s->mb_stride*s->mb_height; i++)
3494             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3495
3496         if(!s->fixed_qscale){
3497             /* finding spatial complexity for I-frame rate control */
3498             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3499         }
3500     }
3501     for(i=1; i<context_count; i++){
3502         merge_context_after_me(s, s->thread_context[i]);
3503     }
3504     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3505     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3506     emms_c();
3507
3508     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3509         s->pict_type= AV_PICTURE_TYPE_I;
3510         for(i=0; i<s->mb_stride*s->mb_height; i++)
3511             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3512         if(s->msmpeg4_version >= 3)
3513             s->no_rounding=1;
3514         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3515                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3516     }
3517
3518     if(!s->umvplus){
3519         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3520             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3521
3522             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3523                 int a,b;
3524                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3525                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3526                 s->f_code= FFMAX3(s->f_code, a, b);
3527             }
3528
3529             ff_fix_long_p_mvs(s);
3530             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3531             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3532                 int j;
3533                 for(i=0; i<2; i++){
3534                     for(j=0; j<2; j++)
3535                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3536                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3537                 }
3538             }
3539         }
3540
3541         if(s->pict_type==AV_PICTURE_TYPE_B){
3542             int a, b;
3543
3544             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3545             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3546             s->f_code = FFMAX(a, b);
3547
3548             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3549             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3550             s->b_code = FFMAX(a, b);
3551
3552             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3553             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3554             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3555             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3556             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3557                 int dir, j;
3558                 for(dir=0; dir<2; dir++){
3559                     for(i=0; i<2; i++){
3560                         for(j=0; j<2; j++){
3561                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3562                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3563                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3564                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3565                         }
3566                     }
3567                 }
3568             }
3569         }
3570     }
3571
3572     if (estimate_qp(s, 0) < 0)
3573         return -1;
3574
3575     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3576         s->qscale= 3; //reduce clipping problems
3577
3578     if (s->out_format == FMT_MJPEG) {
3579         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3580         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3581
3582         if (s->avctx->intra_matrix) {
3583             chroma_matrix =
3584             luma_matrix = s->avctx->intra_matrix;
3585         }
3586         if (s->avctx->chroma_intra_matrix)
3587             chroma_matrix = s->avctx->chroma_intra_matrix;
3588
3589         /* for mjpeg, we do include qscale in the matrix */
3590         for(i=1;i<64;i++){
3591             int j = s->idsp.idct_permutation[i];
3592
3593             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3594             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3595         }
3596         s->y_dc_scale_table=
3597         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3598         s->chroma_intra_matrix[0] =
3599         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3600         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3601                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3602         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3603                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3604         s->qscale= 8;
3605     }
3606     if(s->codec_id == AV_CODEC_ID_AMV){
3607         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3608         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3609         for(i=1;i<64;i++){
3610             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3611
3612             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3613             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3614         }
3615         s->y_dc_scale_table= y;
3616         s->c_dc_scale_table= c;
3617         s->intra_matrix[0] = 13;
3618         s->chroma_intra_matrix[0] = 14;
3619         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3620                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3621         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3622                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3623         s->qscale= 8;
3624     }
3625
3626     //FIXME var duplication
3627     s->current_picture_ptr->f->key_frame =
3628     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3629     s->current_picture_ptr->f->pict_type =
3630     s->current_picture.f->pict_type = s->pict_type;
3631
3632     if (s->current_picture.f->key_frame)
3633         s->picture_in_gop_number=0;
3634
3635     s->mb_x = s->mb_y = 0;
3636     s->last_bits= put_bits_count(&s->pb);
3637     switch(s->out_format) {
3638     case FMT_MJPEG:
3639         if (CONFIG_MJPEG_ENCODER)
3640             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3641                                            s->intra_matrix, s->chroma_intra_matrix);
3642         break;
3643     case FMT_H261:
3644         if (CONFIG_H261_ENCODER)
3645             ff_h261_encode_picture_header(s, picture_number);
3646         break;
3647     case FMT_H263:
3648         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3649             ff_wmv2_encode_picture_header(s, picture_number);
3650         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3651             ff_msmpeg4_encode_picture_header(s, picture_number);
3652         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3653             ff_mpeg4_encode_picture_header(s, picture_number);
3654         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3655             ff_rv10_encode_picture_header(s, picture_number);
3656         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3657             ff_rv20_encode_picture_header(s, picture_number);
3658         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3659             ff_flv_encode_picture_header(s, picture_number);
3660         else if (CONFIG_H263_ENCODER)
3661             ff_h263_encode_picture_header(s, picture_number);
3662         break;
3663     case FMT_MPEG1:
3664         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3665             ff_mpeg1_encode_picture_header(s, picture_number);
3666         break;
3667     default:
3668         av_assert0(0);
3669     }
3670     bits= put_bits_count(&s->pb);
3671     s->header_bits= bits - s->last_bits;
3672
3673     for(i=1; i<context_count; i++){
3674         update_duplicate_context_after_me(s->thread_context[i], s);
3675     }
3676     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3677     for(i=1; i<context_count; i++){
3678         merge_context_after_encode(s, s->thread_context[i]);
3679     }
3680     emms_c();
3681     return 0;
3682 }
3683
3684 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3685     const int intra= s->mb_intra;
3686     int i;
3687
3688     s->dct_count[intra]++;
3689
3690     for(i=0; i<64; i++){
3691         int level= block[i];
3692
3693         if(level){
3694             if(level>0){
3695                 s->dct_error_sum[intra][i] += level;
3696                 level -= s->dct_offset[intra][i];
3697                 if(level<0) level=0;
3698             }else{
3699                 s->dct_error_sum[intra][i] -= level;
3700                 level += s->dct_offset[intra][i];
3701                 if(level>0) level=0;
3702             }
3703             block[i]= level;
3704         }
3705     }
3706 }
3707
3708 static int dct_quantize_trellis_c(MpegEncContext *s,
3709                                   int16_t *block, int n,
3710                                   int qscale, int *overflow){
3711     const int *qmat;
3712     const uint8_t *scantable= s->intra_scantable.scantable;
3713     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3714     int max=0;
3715     unsigned int threshold1, threshold2;
3716     int bias=0;
3717     int run_tab[65];
3718     int level_tab[65];
3719     int score_tab[65];
3720     int survivor[65];
3721     int survivor_count;
3722     int last_run=0;
3723     int last_level=0;
3724     int last_score= 0;
3725     int last_i;
3726     int coeff[2][64];
3727     int coeff_count[64];
3728     int qmul, qadd, start_i, last_non_zero, i, dc;
3729     const int esc_length= s->ac_esc_length;
3730     uint8_t * length;
3731     uint8_t * last_length;
3732     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3733
3734     s->fdsp.fdct(block);
3735
3736     if(s->dct_error_sum)
3737         s->denoise_dct(s, block);
3738     qmul= qscale*16;
3739     qadd= ((qscale-1)|1)*8;
3740
3741     if (s->mb_intra) {
3742         int q;
3743         if (!s->h263_aic) {
3744             if (n < 4)
3745                 q = s->y_dc_scale;
3746             else
3747                 q = s->c_dc_scale;
3748             q = q << 3;
3749         } else{
3750             /* For AIC we skip quant/dequant of INTRADC */
3751             q = 1 << 3;
3752             qadd=0;
3753         }
3754
3755         /* note: block[0] is assumed to be positive */
3756         block[0] = (block[0] + (q >> 1)) / q;
3757         start_i = 1;
3758         last_non_zero = 0;
3759         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3760         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3761             bias= 1<<(QMAT_SHIFT-1);
3762         length     = s->intra_ac_vlc_length;
3763         last_length= s->intra_ac_vlc_last_length;
3764     } else {
3765         start_i = 0;
3766         last_non_zero = -1;
3767         qmat = s->q_inter_matrix[qscale];
3768         length     = s->inter_ac_vlc_length;
3769         last_length= s->inter_ac_vlc_last_length;
3770     }
3771     last_i= start_i;
3772
3773     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3774     threshold2= (threshold1<<1);
3775
3776     for(i=63; i>=start_i; i--) {
3777         const int j = scantable[i];
3778         int level = block[j] * qmat[j];
3779
3780         if(((unsigned)(level+threshold1))>threshold2){
3781             last_non_zero = i;
3782             break;
3783         }
3784     }
3785
3786     for(i=start_i; i<=last_non_zero; i++) {
3787         const int j = scantable[i];
3788         int level = block[j] * qmat[j];
3789
3790 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3791 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3792         if(((unsigned)(level+threshold1))>threshold2){
3793             if(level>0){
3794                 level= (bias + level)>>QMAT_SHIFT;
3795                 coeff[0][i]= level;
3796                 coeff[1][i]= level-1;
3797 //                coeff[2][k]= level-2;
3798             }else{
3799                 level= (bias - level)>>QMAT_SHIFT;
3800                 coeff[0][i]= -level;
3801                 coeff[1][i]= -level+1;
3802 //                coeff[2][k]= -level+2;
3803             }
3804             coeff_count[i]= FFMIN(level, 2);
3805             av_assert2(coeff_count[i]);
3806             max |=level;
3807         }else{
3808             coeff[0][i]= (level>>31)|1;
3809             coeff_count[i]= 1;
3810         }
3811     }
3812
3813     *overflow= s->max_qcoeff < max; //overflow might have happened
3814
3815     if(last_non_zero < start_i){
3816         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3817         return last_non_zero;
3818     }
3819
3820     score_tab[start_i]= 0;
3821     survivor[0]= start_i;
3822     survivor_count= 1;
3823
3824     for(i=start_i; i<=last_non_zero; i++){
3825         int level_index, j, zero_distortion;
3826         int dct_coeff= FFABS(block[ scantable[i] ]);
3827         int best_score=256*256*256*120;
3828
3829         if (s->fdsp.fdct == ff_fdct_ifast)
3830             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3831         zero_distortion= dct_coeff*dct_coeff;
3832
3833         for(level_index=0; level_index < coeff_count[i]; level_index++){
3834             int distortion;
3835             int level= coeff[level_index][i];
3836             const int alevel= FFABS(level);
3837             int unquant_coeff;
3838
3839             av_assert2(level);
3840
3841             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3842                 unquant_coeff= alevel*qmul + qadd;
3843             }else{ //MPEG1
3844                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3845                 if(s->mb_intra){
3846                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3847                         unquant_coeff =   (unquant_coeff - 1) | 1;
3848                 }else{
3849                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3850                         unquant_coeff =   (unquant_coeff - 1) | 1;
3851                 }
3852                 unquant_coeff<<= 3;
3853             }
3854
3855             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3856             level+=64;
3857             if((level&(~127)) == 0){
3858                 for(j=survivor_count-1; j>=0; j--){
3859                     int run= i - survivor[j];
3860                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3861                     score += score_tab[i-run];
3862
3863                     if(score < best_score){
3864                         best_score= score;
3865                         run_tab[i+1]= run;
3866                         level_tab[i+1]= level-64;
3867                     }
3868                 }
3869
3870                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3871                     for(j=survivor_count-1; j>=0; j--){
3872                         int run= i - survivor[j];
3873                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3874                         score += score_tab[i-run];
3875                         if(score < last_score){
3876                             last_score= score;
3877                             last_run= run;
3878                             last_level= level-64;
3879                             last_i= i+1;
3880                         }
3881                     }
3882                 }
3883             }else{
3884                 distortion += esc_length*lambda;
3885                 for(j=survivor_count-1; j>=0; j--){
3886                     int run= i - survivor[j];
3887                     int score= distortion + score_tab[i-run];
3888
3889                     if(score < best_score){
3890                         best_score= score;
3891                         run_tab[i+1]= run;
3892                         level_tab[i+1]= level-64;
3893                     }
3894                 }
3895
3896                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3897                   for(j=survivor_count-1; j>=0; j--){
3898                         int run= i - survivor[j];
3899                         int score= distortion + score_tab[i-run];
3900                         if(score < last_score){
3901                             last_score= score;
3902                             last_run= run;
3903                             last_level= level-64;
3904                             last_i= i+1;
3905                         }
3906                     }
3907                 }
3908             }
3909         }
3910
3911         score_tab[i+1]= best_score;
3912
3913         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3914         if(last_non_zero <= 27){
3915             for(; survivor_count; survivor_count--){
3916                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3917                     break;
3918             }
3919         }else{
3920             for(; survivor_count; survivor_count--){
3921                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3922                     break;
3923             }
3924         }
3925
3926         survivor[ survivor_count++ ]= i+1;
3927     }
3928
3929     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3930         last_score= 256*256*256*120;
3931         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3932             int score= score_tab[i];
3933             if(i) score += lambda*2; //FIXME exacter?
3934
3935             if(score < last_score){
3936                 last_score= score;
3937                 last_i= i;
3938                 last_level= level_tab[i];
3939                 last_run= run_tab[i];
3940             }
3941         }
3942     }
3943
3944     s->coded_score[n] = last_score;
3945
3946     dc= FFABS(block[0]);
3947     last_non_zero= last_i - 1;
3948     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3949
3950     if(last_non_zero < start_i)
3951         return last_non_zero;
3952
3953     if(last_non_zero == 0 && start_i == 0){
3954         int best_level= 0;
3955         int best_score= dc * dc;
3956
3957         for(i=0; i<coeff_count[0]; i++){
3958             int level= coeff[i][0];
3959             int alevel= FFABS(level);
3960             int unquant_coeff, score, distortion;
3961
3962             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3963                     unquant_coeff= (alevel*qmul + qadd)>>3;
3964             }else{ //MPEG1
3965                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3966                     unquant_coeff =   (unquant_coeff - 1) | 1;
3967             }
3968             unquant_coeff = (unquant_coeff + 4) >> 3;
3969             unquant_coeff<<= 3 + 3;
3970
3971             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3972             level+=64;
3973             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3974             else                    score= distortion + esc_length*lambda;
3975
3976             if(score < best_score){
3977                 best_score= score;
3978                 best_level= level - 64;
3979             }
3980         }
3981         block[0]= best_level;
3982         s->coded_score[n] = best_score - dc*dc;
3983         if(best_level == 0) return -1;
3984         else                return last_non_zero;
3985     }
3986
3987     i= last_i;
3988     av_assert2(last_level);
3989
3990     block[ perm_scantable[last_non_zero] ]= last_level;
3991     i -= last_run + 1;
3992
3993     for(; i>start_i; i -= run_tab[i] + 1){
3994         block[ perm_scantable[i-1] ]= level_tab[i];
3995     }
3996
3997     return last_non_zero;
3998 }
3999
4000 //#define REFINE_STATS 1
4001 static int16_t basis[64][64];
4002
4003 static void build_basis(uint8_t *perm){
4004     int i, j, x, y;
4005     emms_c();
4006     for(i=0; i<8; i++){
4007         for(j=0; j<8; j++){
4008             for(y=0; y<8; y++){
4009                 for(x=0; x<8; x++){
4010                     double s= 0.25*(1<<BASIS_SHIFT);
4011                     int index= 8*i + j;
4012                     int perm_index= perm[index];
4013                     if(i==0) s*= sqrt(0.5);
4014                     if(j==0) s*= sqrt(0.5);
4015                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4016                 }
4017             }
4018         }
4019     }
4020 }
4021
4022 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4023                         int16_t *block, int16_t *weight, int16_t *orig,
4024                         int n, int qscale){
4025     int16_t rem[64];
4026     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4027     const uint8_t *scantable= s->intra_scantable.scantable;
4028     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4029 //    unsigned int threshold1, threshold2;
4030 //    int bias=0;
4031     int run_tab[65];
4032     int prev_run=0;
4033     int prev_level=0;
4034     int qmul, qadd, start_i, last_non_zero, i, dc;
4035     uint8_t * length;
4036     uint8_t * last_length;
4037     int lambda;
4038     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4039 #ifdef REFINE_STATS
4040 static int count=0;
4041 static int after_last=0;
4042 static int to_zero=0;
4043 static int from_zero=0;
4044 static int raise=0;
4045 static int lower=0;
4046 static int messed_sign=0;
4047 #endif
4048
4049     if(basis[0][0] == 0)
4050         build_basis(s->idsp.idct_permutation);
4051
4052     qmul= qscale*2;
4053     qadd= (qscale-1)|1;
4054     if (s->mb_intra) {
4055         if (!s->h263_aic) {
4056             if (n < 4)
4057                 q = s->y_dc_scale;
4058             else
4059                 q = s->c_dc_scale;
4060         } else{
4061             /* For AIC we skip quant/dequant of INTRADC */
4062             q = 1;
4063             qadd=0;
4064         }
4065         q <<= RECON_SHIFT-3;
4066         /* note: block[0] is assumed to be positive */
4067         dc= block[0]*q;
4068 //        block[0] = (block[0] + (q >> 1)) / q;
4069         start_i = 1;
4070 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4071 //            bias= 1<<(QMAT_SHIFT-1);
4072         length     = s->intra_ac_vlc_length;
4073         last_length= s->intra_ac_vlc_last_length;
4074     } else {
4075         dc= 0;
4076         start_i = 0;
4077         length     = s->inter_ac_vlc_length;
4078         last_length= s->inter_ac_vlc_last_length;
4079     }
4080     last_non_zero = s->block_last_index[n];
4081
4082 #ifdef REFINE_STATS
4083 {START_TIMER
4084 #endif
4085     dc += (1<<(RECON_SHIFT-1));
4086     for(i=0; i<64; i++){
4087         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4088     }
4089 #ifdef REFINE_STATS
4090 STOP_TIMER("memset rem[]")}
4091 #endif
4092     sum=0;
4093     for(i=0; i<64; i++){
4094         int one= 36;
4095         int qns=4;
4096         int w;
4097
4098         w= FFABS(weight[i]) + qns*one;
4099         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4100
4101         weight[i] = w;
4102 //        w=weight[i] = (63*qns + (w/2)) / w;
4103
4104         av_assert2(w>0);
4105         av_assert2(w<(1<<6));
4106         sum += w*w;
4107     }
4108     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4109 #ifdef REFINE_STATS
4110 {START_TIMER
4111 #endif
4112     run=0;
4113     rle_index=0;
4114     for(i=start_i; i<=last_non_zero; i++){
4115         int j= perm_scantable[i];
4116         const int level= block[j];
4117         int coeff;
4118
4119         if(level){
4120             if(level<0) coeff= qmul*level - qadd;
4121             else        coeff= qmul*level + qadd;
4122             run_tab[rle_index++]=run;
4123             run=0;
4124
4125             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4126         }else{
4127             run++;
4128         }
4129     }
4130 #ifdef REFINE_STATS
4131 if(last_non_zero>0){
4132 STOP_TIMER("init rem[]")
4133 }
4134 }
4135
4136 {START_TIMER
4137 #endif
4138     for(;;){
4139         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4140         int best_coeff=0;
4141         int best_change=0;
4142         int run2, best_unquant_change=0, analyze_gradient;
4143 #ifdef REFINE_STATS
4144 {START_TIMER
4145 #endif
4146         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4147
4148         if(analyze_gradient){
4149 #ifdef REFINE_STATS
4150 {START_TIMER
4151 #endif
4152             for(i=0; i<64; i++){
4153                 int w= weight[i];
4154
4155                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4156             }
4157 #ifdef REFINE_STATS
4158 STOP_TIMER("rem*w*w")}
4159 {START_TIMER
4160 #endif
4161             s->fdsp.fdct(d1);
4162 #ifdef REFINE_STATS
4163 STOP_TIMER("dct")}
4164 #endif
4165         }
4166
4167         if(start_i){
4168             const int level= block[0];
4169             int change, old_coeff;
4170
4171             av_assert2(s->mb_intra);
4172
4173             old_coeff= q*level;
4174
4175             for(change=-1; change<=1; change+=2){
4176                 int new_level= level + change;
4177                 int score, new_coeff;
4178
4179                 new_coeff= q*new_level;
4180                 if(new_coeff >= 2048 || new_coeff < 0)
4181                     continue;
4182
4183                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4184                                                   new_coeff - old_coeff);
4185                 if(score<best_score){
4186                     best_score= score;
4187                     best_coeff= 0;
4188                     best_change= change;
4189                     best_unquant_change= new_coeff - old_coeff;
4190                 }
4191             }
4192         }
4193
4194         run=0;
4195         rle_index=0;
4196         run2= run_tab[rle_index++];
4197         prev_level=0;
4198         prev_run=0;
4199
4200         for(i=start_i; i<64; i++){
4201             int j= perm_scantable[i];
4202             const int level= block[j];
4203             int change, old_coeff;
4204
4205             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4206                 break;
4207
4208             if(level){
4209                 if(level<0) old_coeff= qmul*level - qadd;
4210                 else        old_coeff= qmul*level + qadd;
4211                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4212             }else{
4213                 old_coeff=0;
4214                 run2--;
4215                 av_assert2(run2>=0 || i >= last_non_zero );
4216             }
4217
4218             for(change=-1; change<=1; change+=2){
4219                 int new_level= level + change;
4220                 int score, new_coeff, unquant_change;
4221
4222                 score=0;
4223                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4224                    continue;
4225
4226                 if(new_level){
4227                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4228                     else            new_coeff= qmul*new_level + qadd;
4229                     if(new_coeff >= 2048 || new_coeff <= -2048)
4230                         continue;
4231                     //FIXME check for overflow
4232
4233                     if(level){
4234                         if(level < 63 && level > -63){
4235                             if(i < last_non_zero)
4236                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4237                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4238                             else
4239                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4240                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4241                         }
4242                     }else{
4243                         av_assert2(FFABS(new_level)==1);
4244
4245                         if(analyze_gradient){
4246                             int g= d1[ scantable[i] ];
4247                             if(g && (g^new_level) >= 0)
4248                                 continue;
4249                         }
4250
4251                         if(i < last_non_zero){
4252                             int next_i= i + run2 + 1;
4253                             int next_level= block[ perm_scantable[next_i] ] + 64;
4254
4255                             if(next_level&(~127))
4256                                 next_level= 0;
4257
4258                             if(next_i < last_non_zero)
4259                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4260                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4261                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4262                             else
4263                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4264                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4265                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4266                         }else{
4267                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4268                             if(prev_level){
4269                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4270                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4271                             }
4272                         }
4273                     }
4274                 }else{
4275                     new_coeff=0;
4276                     av_assert2(FFABS(level)==1);
4277
4278                     if(i < last_non_zero){
4279                         int next_i= i + run2 + 1;
4280                         int next_level= block[ perm_scantable[next_i] ] + 64;
4281
4282                         if(next_level&(~127))
4283                             next_level= 0;
4284
4285                         if(next_i < last_non_zero)
4286                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4287                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4288                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4289                         else
4290                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4291                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4292                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4293                     }else{
4294                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4295                         if(prev_level){
4296                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4297                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4298                         }
4299                     }
4300                 }
4301
4302                 score *= lambda;
4303
4304                 unquant_change= new_coeff - old_coeff;
4305                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4306
4307                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4308                                                    unquant_change);
4309                 if(score<best_score){
4310                     best_score= score;
4311                     best_coeff= i;
4312                     best_change= change;
4313                     best_unquant_change= unquant_change;
4314                 }
4315             }
4316             if(level){
4317                 prev_level= level + 64;
4318                 if(prev_level&(~127))
4319                     prev_level= 0;
4320                 prev_run= run;
4321                 run=0;
4322             }else{
4323                 run++;
4324             }
4325         }
4326 #ifdef REFINE_STATS
4327 STOP_TIMER("iterative step")}
4328 #endif
4329
4330         if(best_change){
4331             int j= perm_scantable[ best_coeff ];
4332
4333             block[j] += best_change;
4334
4335             if(best_coeff > last_non_zero){
4336                 last_non_zero= best_coeff;
4337                 av_assert2(block[j]);
4338 #ifdef REFINE_STATS
4339 after_last++;
4340 #endif
4341             }else{
4342 #ifdef REFINE_STATS
4343 if(block[j]){
4344     if(block[j] - best_change){
4345         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4346             raise++;
4347         }else{
4348             lower++;
4349         }
4350     }else{
4351         from_zero++;
4352     }
4353 }else{
4354     to_zero++;
4355 }
4356 #endif
4357                 for(; last_non_zero>=start_i; last_non_zero--){
4358                     if(block[perm_scantable[last_non_zero]])
4359                         break;
4360                 }
4361             }
4362 #ifdef REFINE_STATS
4363 count++;
4364 if(256*256*256*64 % count == 0){
4365     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4366 }
4367 #endif
4368             run=0;
4369             rle_index=0;
4370             for(i=start_i; i<=last_non_zero; i++){
4371                 int j= perm_scantable[i];
4372                 const int level= block[j];
4373
4374                  if(level){
4375                      run_tab[rle_index++]=run;
4376                      run=0;
4377                  }else{
4378                      run++;
4379                  }
4380             }
4381
4382             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4383         }else{
4384             break;
4385         }
4386     }
4387 #ifdef REFINE_STATS
4388 if(last_non_zero>0){
4389 STOP_TIMER("iterative search")
4390 }
4391 }
4392 #endif
4393
4394     return last_non_zero;
4395 }
4396
4397 int ff_dct_quantize_c(MpegEncContext *s,
4398                         int16_t *block, int n,
4399                         int qscale, int *overflow)
4400 {
4401     int i, j, level, last_non_zero, q, start_i;
4402     const int *qmat;
4403     const uint8_t *scantable= s->intra_scantable.scantable;
4404     int bias;
4405     int max=0;
4406     unsigned int threshold1, threshold2;
4407
4408     s->fdsp.fdct(block);
4409
4410     if(s->dct_error_sum)
4411         s->denoise_dct(s, block);
4412
4413     if (s->mb_intra) {
4414         if (!s->h263_aic) {
4415             if (n < 4)
4416                 q = s->y_dc_scale;
4417             else
4418                 q = s->c_dc_scale;
4419             q = q << 3;
4420         } else
4421             /* For AIC we skip quant/dequant of INTRADC */
4422             q = 1 << 3;
4423
4424         /* note: block[0] is assumed to be positive */
4425         block[0] = (block[0] + (q >> 1)) / q;
4426         start_i = 1;
4427         last_non_zero = 0;
4428         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4429         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4430     } else {
4431         start_i = 0;
4432         last_non_zero = -1;
4433         qmat = s->q_inter_matrix[qscale];
4434         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4435     }
4436     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4437     threshold2= (threshold1<<1);
4438     for(i=63;i>=start_i;i--) {
4439         j = scantable[i];
4440         level = block[j] * qmat[j];
4441
4442         if(((unsigned)(level+threshold1))>threshold2){
4443             last_non_zero = i;
4444             break;
4445         }else{
4446             block[j]=0;
4447         }
4448     }
4449     for(i=start_i; i<=last_non_zero; i++) {
4450         j = scantable[i];
4451         level = block[j] * qmat[j];
4452
4453 //        if(   bias+level >= (1<<QMAT_SHIFT)
4454 //           || bias-level >= (1<<QMAT_SHIFT)){
4455         if(((unsigned)(level+threshold1))>threshold2){
4456             if(level>0){
4457                 level= (bias + level)>>QMAT_SHIFT;
4458                 block[j]= level;
4459             }else{
4460                 level= (bias - level)>>QMAT_SHIFT;
4461                 block[j]= -level;
4462             }
4463             max |=level;
4464         }else{
4465             block[j]=0;
4466         }
4467     }
4468     *overflow= s->max_qcoeff < max; //overflow might have happened
4469
4470     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4471     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4472         ff_block_permute(block, s->idsp.idct_permutation,
4473                          scantable, last_non_zero);
4474
4475     return last_non_zero;
4476 }
4477
4478 #define OFFSET(x) offsetof(MpegEncContext, x)
4479 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4480 static const AVOption h263_options[] = {
4481     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4482     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4483     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4484     FF_MPV_COMMON_OPTS
4485     { NULL },
4486 };
4487
4488 static const AVClass h263_class = {
4489     .class_name = "H.263 encoder",
4490     .item_name  = av_default_item_name,
4491     .option     = h263_options,
4492     .version    = LIBAVUTIL_VERSION_INT,
4493 };
4494
4495 AVCodec ff_h263_encoder = {
4496     .name           = "h263",
4497     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4498     .type           = AVMEDIA_TYPE_VIDEO,
4499     .id             = AV_CODEC_ID_H263,
4500     .priv_data_size = sizeof(MpegEncContext),
4501     .init           = ff_MPV_encode_init,
4502     .encode2        = ff_MPV_encode_picture,
4503     .close          = ff_MPV_encode_end,
4504     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4505     .priv_class     = &h263_class,
4506 };
4507
4508 static const AVOption h263p_options[] = {
4509     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4510     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4511     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4512     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4513     FF_MPV_COMMON_OPTS
4514     { NULL },
4515 };
4516 static const AVClass h263p_class = {
4517     .class_name = "H.263p encoder",
4518     .item_name  = av_default_item_name,
4519     .option     = h263p_options,
4520     .version    = LIBAVUTIL_VERSION_INT,
4521 };
4522
4523 AVCodec ff_h263p_encoder = {
4524     .name           = "h263p",
4525     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4526     .type           = AVMEDIA_TYPE_VIDEO,
4527     .id             = AV_CODEC_ID_H263P,
4528     .priv_data_size = sizeof(MpegEncContext),
4529     .init           = ff_MPV_encode_init,
4530     .encode2        = ff_MPV_encode_picture,
4531     .close          = ff_MPV_encode_end,
4532     .capabilities   = CODEC_CAP_SLICE_THREADS,
4533     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4534     .priv_class     = &h263p_class,
4535 };
4536
4537 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4538
4539 AVCodec ff_msmpeg4v2_encoder = {
4540     .name           = "msmpeg4v2",
4541     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4542     .type           = AVMEDIA_TYPE_VIDEO,
4543     .id             = AV_CODEC_ID_MSMPEG4V2,
4544     .priv_data_size = sizeof(MpegEncContext),
4545     .init           = ff_MPV_encode_init,
4546     .encode2        = ff_MPV_encode_picture,
4547     .close          = ff_MPV_encode_end,
4548     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4549     .priv_class     = &msmpeg4v2_class,
4550 };
4551
4552 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4553
4554 AVCodec ff_msmpeg4v3_encoder = {
4555     .name           = "msmpeg4",
4556     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4557     .type           = AVMEDIA_TYPE_VIDEO,
4558     .id             = AV_CODEC_ID_MSMPEG4V3,
4559     .priv_data_size = sizeof(MpegEncContext),
4560     .init           = ff_MPV_encode_init,
4561     .encode2        = ff_MPV_encode_picture,
4562     .close          = ff_MPV_encode_end,
4563     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4564     .priv_class     = &msmpeg4v3_class,
4565 };
4566
4567 FF_MPV_GENERIC_CLASS(wmv1)
4568
4569 AVCodec ff_wmv1_encoder = {
4570     .name           = "wmv1",
4571     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4572     .type           = AVMEDIA_TYPE_VIDEO,
4573     .id             = AV_CODEC_ID_WMV1,
4574     .priv_data_size = sizeof(MpegEncContext),
4575     .init           = ff_MPV_encode_init,
4576     .encode2        = ff_MPV_encode_picture,
4577     .close          = ff_MPV_encode_end,
4578     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4579     .priv_class     = &wmv1_class,
4580 };