1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
16 ********************************************************************/
24 typedef struct oc_mcenc_ctx oc_mcenc_ctx;
28 /*Temporary state used for motion estimation.*/
30 /*The candidate motion vectors.*/
31 int candidates[13][2];
32 /*The start of the Set B candidates.*/
34 /*The total number of candidates.*/
40 /*The maximum Y plane SAD value for accepting the median predictor.*/
41 #define OC_YSAD_THRESH1 (256)
42 /*The amount to right shift the minimum error by when inflating it for
43 computing the second maximum Y plane SAD threshold.*/
44 #define OC_YSAD_THRESH2_SCALE_BITS (4)
45 /*The amount to add to the second maximum Y plane threshold when inflating
47 #define OC_YSAD_THRESH2_OFFSET (64)
49 /*The vector offsets in the X direction for each search site in the square
51 static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
52 /*The vector offsets in the Y direction for each search site in the square
54 static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
55 /*The number of sites to search for each boundary condition in the square
57 Bit flags for the boundary conditions are as follows:
62 static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
63 /*The list of sites to search for each boundary condition in the square
65 static const int OC_SQUARE_SITES[11][8]={
66 /* -15.5<dx<31, -15.5<dy<15(.5)*/
68 /*-15.5==dx, -15.5<dy<15(.5)*/
70 /* dx==15(.5), -15.5<dy<15(.5)*/
72 /*-15.5==dx==15(.5), -15.5<dy<15(.5)*/
74 /* -15.5<dx<15(.5), -15.5==dy*/
76 /*-15.5==dx, -15.5==dy*/
78 /* dx==15(.5), -15.5==dy*/
80 /*-15.5==dx==15(.5), -15.5==dy*/
82 /*-15.5dx<15(.5), dy==15(.5)*/
84 /*-15.5==dx, dy==15(.5)*/
86 /* dx==15(.5), dy==15(.5)*/
91 static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
92 int _accum[2],int _mbi,int _frame){
99 /*Skip a position to store the median predictor in.*/
101 if(embs[_mbi].ncneighbors>0){
102 /*Fill in the first part of set A: the vectors from adjacent blocks.*/
103 for(i=0;i<embs[_mbi].ncneighbors;i++){
104 nmbi=embs[_mbi].cneighbors[i];
105 _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
106 _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
110 /*Add a few additional vectors to set A: the vectors used in the previous
111 frames and the (0,0) vector.*/
112 _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31);
113 _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
115 _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
116 embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
117 _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
118 embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
120 _mcenc->candidates[ncandidates][0]=0;
121 _mcenc->candidates[ncandidates][1]=0;
123 /*Use the first three vectors of set A to find our best predictor: their
125 memcpy(a,_mcenc->candidates+1,sizeof(a));
126 OC_SORT2I(a[0][0],a[1][0]);
127 OC_SORT2I(a[0][1],a[1][1]);
128 OC_SORT2I(a[1][0],a[2][0]);
129 OC_SORT2I(a[1][1],a[2][1]);
130 OC_SORT2I(a[0][0],a[1][0]);
131 OC_SORT2I(a[0][1],a[1][1]);
132 _mcenc->candidates[0][0]=a[1][0];
133 _mcenc->candidates[0][1]=a[1][1];
134 /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
135 _mcenc->setb0=ncandidates;
136 /*The first time through the loop use the current macro block.*/
139 _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
140 2*embs[_mbi].analysis_mv[1][_frame][0]
141 -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
142 _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
143 2*embs[_mbi].analysis_mv[1][_frame][1]
144 -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
146 if(i>=embs[_mbi].npneighbors)break;
147 nmbi=embs[_mbi].pneighbors[i];
149 /*Truncate to full-pel positions.*/
150 for(i=0;i<ncandidates;i++){
151 _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]);
152 _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]);
154 _mcenc->ncandidates=ncandidates;
158 static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
159 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
160 int _mvoffset0,int _mvoffset1,const unsigned char *_src,
161 const unsigned char *_ref,int _ystride,unsigned _best_err){
167 frag_offs=_frag_buf_offs[_fragis[bi]];
168 err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
169 _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
175 static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
176 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
177 int _mvoffset0,int _mvoffset1,const unsigned char *_src,
178 const unsigned char *_ref,int _ystride,unsigned _best_err){
184 frag_offs=_frag_buf_offs[_fragis[bi]];
185 err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
186 _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
191 static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
192 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
193 const unsigned char *_src,const unsigned char *_ref,int _ystride,
194 unsigned _block_err[4]){
198 mvoffset=_dx+_dy*_ystride;
203 frag_offs=_frag_buf_offs[_fragis[bi]];
204 block_err=oc_enc_frag_sad(_enc,
205 _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
206 _block_err[bi]=block_err;
212 static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
213 const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
214 const unsigned char *_src,const unsigned char *_ref,int _ystride){
218 mvoffset=_dx+_dy*_ystride;
222 frag_offs=_frag_buf_offs[_fragis[bi]];
223 err+=oc_enc_frag_satd_thresh(_enc,
224 _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
229 static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
230 ptrdiff_t _frag_offs,int _dx,int _dy,
231 const unsigned char *_src,const unsigned char *_ref,int _ystride){
232 return oc_enc_frag_satd_thresh(_enc,
233 _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
236 /*Perform a motion vector search for this macro block against a single
238 As a bonus, individual block motion vectors are computed as well, as much of
239 the work can be shared.
240 The actual motion vector is stored in the appropriate place in the
241 oc_mb_enc_info structure.
242 _mcenc: The motion compensation context.
243 _accum: Drop frame/golden MV accumulators.
244 _mbi: The macro block index.
245 _frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/
246 void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){
247 /*Note: Traditionally this search is done using a rate-distortion objective
248 function of the form D+lambda*R.
249 However, xiphmont tested this and found it produced a small degredation,
250 while requiring extra computation.
251 This is most likely due to Theora's peculiar MV encoding scheme: MVs are
252 not coded relative to a predictor, and the only truly cheap way to use a
253 MV is in the LAST or LAST2 MB modes, which are not being considered here.
254 Therefore if we use the MV found here, it's only because both LAST and
255 LAST2 performed poorly, and therefore the MB is not likely to be uniform
256 or suffer from the aperture problem.
257 Furthermore we would like to re-use the MV found here for as many MBs as
258 possible, so picking a slightly sub-optimal vector to save a bit or two
259 may cause increased degredation in many blocks to come.
260 We could artificially reduce lambda to compensate, but it's faster to just
261 disable it entirely, and use D (the distortion) as the sole criterion.*/
263 const ptrdiff_t *frag_buf_offs;
264 const ptrdiff_t *fragis;
265 const unsigned char *src;
266 const unsigned char *ref;
268 oc_mb_enc_info *embs;
269 ogg_int32_t hit_cache[31];
271 unsigned best_block_err[4];
272 unsigned block_err[4];
275 int best_block_vec[4][2];
280 /*Find some candidate motion vectors.*/
281 oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
282 /*Clear the cache of locations we've examined.*/
283 memset(hit_cache,0,sizeof(hit_cache));
284 /*Start with the median predictor.*/
285 candx=mcenc.candidates[0][0];
286 candy=mcenc.candidates[0][1];
287 hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
288 frag_buf_offs=_enc->state.frag_buf_offs;
289 fragis=_enc->state.mb_maps[_mbi][0];
290 src=_enc->state.ref_frame_data[OC_FRAME_IO];
291 ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
292 ystride=_enc->state.ref_ystride[0];
293 /*TODO: customize error function for speed/(quality+size) tradeoff.*/
294 best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
295 frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
298 if(_frame==OC_FRAME_PREV){
300 best_block_err[bi]=block_err[bi];
301 best_block_vec[bi][0]=candx;
302 best_block_vec[bi][1]=candy;
305 /*If this predictor fails, move on to set A.*/
306 if(best_err>OC_YSAD_THRESH1){
311 /*Compute the early termination threshold for set A.*/
312 t2=embs[_mbi].error[_frame];
313 ncs=OC_MINI(3,embs[_mbi].ncneighbors);
314 for(ci=0;ci<ncs;ci++){
315 t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
317 t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
318 /*Examine the candidates in set A.*/
319 for(ci=1;ci<mcenc.setb0;ci++){
320 candx=mcenc.candidates[ci][0];
321 candy=mcenc.candidates[ci][1];
322 /*If we've already examined this vector, then we would be using it if it
323 was better than what we are using.*/
324 hitbit=(ogg_int32_t)1<<candx+15;
325 if(hit_cache[candy+15]&hitbit)continue;
326 hit_cache[candy+15]|=hitbit;
327 err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
328 frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
334 if(_frame==OC_FRAME_PREV){
335 for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
336 best_block_err[bi]=block_err[bi];
337 best_block_vec[bi][0]=candx;
338 best_block_vec[bi][1]=candy;
343 /*Examine the candidates in set B.*/
344 for(;ci<mcenc.ncandidates;ci++){
345 candx=mcenc.candidates[ci][0];
346 candy=mcenc.candidates[ci][1];
347 hitbit=(ogg_int32_t)1<<candx+15;
348 if(hit_cache[candy+15]&hitbit)continue;
349 hit_cache[candy+15]|=hitbit;
350 err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
351 frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
357 if(_frame==OC_FRAME_PREV){
358 for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
359 best_block_err[bi]=block_err[bi];
360 best_block_vec[bi][0]=candx;
361 best_block_vec[bi][1]=candy;
365 /*Use the same threshold for set B as in set A.*/
372 /*Square pattern search.*/
375 /*Compose the bit flags for boundary conditions.*/
376 b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
377 OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
378 nsites=OC_SQUARE_NSITES[b];
379 for(sitei=0;sitei<nsites;sitei++){
380 site=OC_SQUARE_SITES[b][sitei];
381 candx=best_vec[0]+OC_SQUARE_DX[site];
382 candy=best_vec[1]+OC_SQUARE_DY[site];
383 hitbit=(ogg_int32_t)1<<candx+15;
384 if(hit_cache[candy+15]&hitbit)continue;
385 hit_cache[candy+15]|=hitbit;
386 err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
387 frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
392 if(_frame==OC_FRAME_PREV){
393 for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
394 best_block_err[bi]=block_err[bi];
395 best_block_vec[bi][0]=candx;
396 best_block_vec[bi][1]=candy;
400 if(best_site==4)break;
401 best_vec[0]+=OC_SQUARE_DX[best_site];
402 best_vec[1]+=OC_SQUARE_DY[best_site];
404 /*Final 4-MV search.*/
405 /*Simply use 1/4 of the macro block set A and B threshold as the
406 individual block threshold.*/
407 if(_frame==OC_FRAME_PREV){
410 if(best_block_err[bi]>t2){
411 /*Square pattern search.
412 We do this in a slightly interesting manner.
413 We continue to check the SAD of all four blocks in the
415 This gives us two things:
416 1) We can continue to use the hit_cache to avoid duplicate
418 Otherwise we could continue to read it, but not write to it
419 without saving and restoring it for each block.
420 Note that we could still eliminate a large number of
421 duplicate checks by taking into account the site we came
422 from when choosing the site list.
423 We can still do that to avoid extra hit_cache queries, and
424 it might even be a speed win.
425 2) It gives us a slightly better chance of escaping local
427 We would not be here if we weren't doing a fairly bad job
428 in finding a good vector, and checking these vectors can
429 save us from 100 to several thousand points off our SAD 1
431 TODO: Is this a good idea?
433 It needs more testing.*/
438 bestx=best_block_vec[bi][0];
439 besty=best_block_vec[bi][1];
440 /*Compose the bit flags for boundary conditions.*/
441 b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
442 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
443 nsites=OC_SQUARE_NSITES[b];
444 for(sitei=0;sitei<nsites;sitei++){
445 site=OC_SQUARE_SITES[b][sitei];
446 candx=bestx+OC_SQUARE_DX[site];
447 candy=besty+OC_SQUARE_DY[site];
448 hitbit=(ogg_int32_t)1<<candx+15;
449 if(hit_cache[candy+15]&hitbit)continue;
450 hit_cache[candy+15]|=hitbit;
451 err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
452 frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
458 for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
459 best_block_err[bj]=block_err[bj];
460 best_block_vec[bj][0]=candx;
461 best_block_vec[bj][1]=candy;
464 if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
474 embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
477 embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
478 frag_buf_offs,fragis,candx,candy,src,ref,ystride);
479 embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
480 embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
481 if(_frame==OC_FRAME_PREV){
483 candx=best_block_vec[bi][0];
484 candy=best_block_vec[bi][1];
485 embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
486 frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
487 embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
488 embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
493 void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
497 mvs=_enc->mb_info[_mbi].analysis_mv;
498 if(_enc->prevframe_dropped){
499 accum_p[0]=mvs[0][OC_FRAME_PREV][0];
500 accum_p[1]=mvs[0][OC_FRAME_PREV][1];
502 else accum_p[1]=accum_p[0]=0;
503 accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
504 accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
505 mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
506 mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
507 /*Move the motion vector predictors back a frame.*/
508 memmove(mvs+1,mvs,2*sizeof(*mvs));
509 /*Search the last frame.*/
510 oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
511 mvs[2][OC_FRAME_PREV][0]=accum_p[0];
512 mvs[2][OC_FRAME_PREV][1]=accum_p[1];
513 /*GOLDEN MVs are different from PREV MVs in that they're each absolute
514 offsets from some frame in the past rather than relative offsets from the
516 For predictor calculation to make sense, we need them to be in the same
518 mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
519 mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
520 mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
521 mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
522 /*Search the golden frame.*/
523 oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
524 /*Put GOLDEN MVs back into absolute offset form.
525 The newest MV is already an absolute offset.*/
526 mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
527 mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
528 mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
529 mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
533 static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
534 int _vec[2],int _best_err,int _frame){
535 const unsigned char *src;
536 const unsigned char *ref;
537 const ptrdiff_t *frag_buf_offs;
538 const ptrdiff_t *fragis;
545 src=_enc->state.ref_frame_data[OC_FRAME_IO];
546 ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]];
547 frag_buf_offs=_enc->state.frag_buf_offs;
548 fragis=_enc->state.mb_maps[_mbi][0];
549 ystride=_enc->state.ref_ystride[0];
550 mvoffset_base=_vec[0]+_vec[1]*ystride;
551 offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
552 offset_y[3]=offset_y[5]=0;
553 offset_y[6]=offset_y[7]=offset_y[8]=ystride;
555 for(sitei=0;sitei<8;sitei++){
563 site=OC_SQUARE_SITES[0][sitei];
564 dx=OC_SQUARE_DX[site];
565 dy=OC_SQUARE_DY[site];
566 /*The following code SHOULD be equivalent to
567 oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
568 (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
569 However, it should also be much faster, as it involves no multiplies and
570 doesn't have to handle chroma vectors.*/
571 xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
572 ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
573 mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
574 mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
575 err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
576 mvoffset0,mvoffset1,src,ref,ystride,_best_err);
582 _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
583 _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
588 static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
589 int _mbi,int _vec[2],unsigned _best_err,int _frame){
590 const unsigned char *src;
591 const unsigned char *ref;
592 const ptrdiff_t *frag_buf_offs;
593 const ptrdiff_t *fragis;
600 src=_enc->state.ref_frame_data[OC_FRAME_IO];
601 ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
602 frag_buf_offs=_enc->state.frag_buf_offs;
603 fragis=_enc->state.mb_maps[_mbi][0];
604 ystride=_enc->state.ref_ystride[0];
605 mvoffset_base=_vec[0]+_vec[1]*ystride;
606 offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
607 offset_y[3]=offset_y[5]=0;
608 offset_y[6]=offset_y[7]=offset_y[8]=ystride;
610 for(sitei=0;sitei<8;sitei++){
618 site=OC_SQUARE_SITES[0][sitei];
619 dx=OC_SQUARE_DX[site];
620 dy=OC_SQUARE_DY[site];
621 /*The following code SHOULD be equivalent to
622 oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
623 (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
624 However, it should also be much faster, as it involves no multiplies and
625 doesn't have to handle chroma vectors.*/
626 xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
627 ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
628 mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
629 mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
630 err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
631 mvoffset0,mvoffset1,src,ref,ystride,_best_err);
637 _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
638 _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
642 void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
643 oc_mb_enc_info *embs;
646 vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
647 vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
648 embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
649 _mbi,vec,embs[_mbi].satd[_frame],_frame);
650 embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
651 embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
655 static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
656 int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
657 int _offset_y[9],unsigned _best_err){
661 mvoffset_base=_vec[0]+_vec[1]*_ystride;
663 for(sitei=0;sitei<8;sitei++){
672 site=OC_SQUARE_SITES[0][sitei];
673 dx=OC_SQUARE_DX[site];
674 dy=OC_SQUARE_DY[site];
675 /*The following code SHOULD be equivalent to
676 oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
677 (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
678 However, it should also be much faster, as it involves no multiplies and
679 doesn't have to handle chroma vectors.*/
680 xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
681 ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
682 mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
683 mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
684 err=oc_enc_frag_sad2_thresh(_enc,_src,
685 _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
691 _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
692 _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
697 static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
698 int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
699 int _offset_y[9],unsigned _best_err){
703 mvoffset_base=_vec[0]+_vec[1]*_ystride;
705 for(sitei=0;sitei<8;sitei++){
714 site=OC_SQUARE_SITES[0][sitei];
715 dx=OC_SQUARE_DX[site];
716 dy=OC_SQUARE_DY[site];
717 /*The following code SHOULD be equivalent to
718 oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
719 (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
720 However, it should also be much faster, as it involves no multiplies and
721 doesn't have to handle chroma vectors.*/
722 xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
723 ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
724 mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
725 mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
726 err=oc_enc_frag_satd2_thresh(_enc,_src,
727 _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
733 _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
734 _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
738 void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
739 oc_mb_enc_info *embs;
740 const ptrdiff_t *frag_buf_offs;
741 const ptrdiff_t *fragis;
742 const unsigned char *src;
743 const unsigned char *ref;
747 ystride=_enc->state.ref_ystride[0];
748 frag_buf_offs=_enc->state.frag_buf_offs;
749 fragis=_enc->state.mb_maps[_mbi][0];
750 src=_enc->state.ref_frame_data[OC_FRAME_IO];
751 ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
752 offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
753 offset_y[3]=offset_y[5]=0;
754 offset_y[6]=offset_y[7]=offset_y[8]=ystride;
759 frag_offs=frag_buf_offs[fragis[bi]];
760 vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
761 vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
762 embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
763 src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
764 embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
765 embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];