MODE_INFO size reduction
[profile/ivi/libvpx.git] / vp8 / decoder / threading.c
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11
12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
13 # include <unistd.h>
14 #endif
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vp8/common/threading.h"
18
19 #include "vp8/common/loopfilter.h"
20 #include "vp8/common/extend.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "detokenize.h"
23 #include "vp8/common/reconinter.h"
24 #include "reconintra_mt.h"
25
26 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
27 extern void clamp_mvs(MACROBLOCKD *xd);
28 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
29
30 #if CONFIG_RUNTIME_CPU_DETECT
31 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
32 #else
33 #define RTCD_VTABLE(x) NULL
34 #endif
35
36 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
37 {
38     VP8_COMMON *const pc = & pbi->common;
39     int i, j;
40
41     for (i = 0; i < count; i++)
42     {
43         MACROBLOCKD *mbd = &mbrd[i].mbd;
44 #if CONFIG_RUNTIME_CPU_DETECT
45         mbd->rtcd = xd->rtcd;
46 #endif
47         mbd->subpixel_predict        = xd->subpixel_predict;
48         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
49         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
50         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
51
52         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
53         mbd->mode_info_stride  = pc->mode_info_stride;
54
55         mbd->frame_type = pc->frame_type;
56         mbd->frames_since_golden      = pc->frames_since_golden;
57         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
58
59         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
60         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
61
62         vp8_setup_block_dptrs(mbd);
63         vp8_build_block_doffsets(mbd);
64         mbd->segmentation_enabled    = xd->segmentation_enabled;
65         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
66         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
67
68         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
69         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
70         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
71         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
72         /*unsigned char mode_ref_lf_delta_enabled;
73         unsigned char mode_ref_lf_delta_update;*/
74         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
75         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
76
77         mbd->current_bc = &pbi->bc2;
78
79         for (j = 0; j < 25; j++)
80         {
81             mbd->block[j].dequant = xd->block[j].dequant;
82         }
83     }
84
85     for (i=0; i< pc->mb_rows; i++)
86         pbi->mt_current_mb_col[i]=-1;
87 }
88
89
90 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
91 {
92     int eobtotal = 0;
93     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
94
95     if (xd->mode_info_context->mbmi.mb_skip_coeff)
96     {
97         vp8_reset_mb_tokens_context(xd);
98     }
99     else
100     {
101         eobtotal = vp8_decode_mb_tokens(pbi, xd);
102     }
103
104     /* Perform temporary clamping of the MV to be used for prediction */
105     if (do_clamp)
106     {
107         clamp_mvs(xd);
108     }
109
110     eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
111                   xd->mode_info_context->mbmi.mode == SPLITMV);
112     if (!eobtotal)
113     {
114         /* Special case:  Force the loopfilter to skip when eobtotal and
115          * mb_skip_coeff are zero.
116          * */
117         xd->mode_info_context->mbmi.mb_skip_coeff = 1;
118
119         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
120         if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
121         {
122             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
123             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
124         }
125         else
126         {
127             vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
128                                                xd->dst.u_buffer, xd->dst.v_buffer,
129                                                xd->dst.y_stride, xd->dst.uv_stride);
130         }
131         return;
132     }
133
134     if (xd->segmentation_enabled)
135         mb_init_dequantizer(pbi, xd);
136
137     /* do prediction */
138     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
139     {
140         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
141
142         if (xd->mode_info_context->mbmi.mode != B_PRED)
143         {
144             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
145         } else {
146             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
147         }
148     }
149     else
150     {
151         vp8_build_inter_predictors_mb(xd);
152     }
153
154     /* dequantization and idct */
155     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
156     {
157         BLOCKD *b = &xd->block[24];
158         DEQUANT_INVOKE(&pbi->dequant, block)(b);
159
160         /* do 2nd order transform on the dc block */
161         if (xd->eobs[24] > 1)
162         {
163             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
164             ((int *)b->qcoeff)[0] = 0;
165             ((int *)b->qcoeff)[1] = 0;
166             ((int *)b->qcoeff)[2] = 0;
167             ((int *)b->qcoeff)[3] = 0;
168             ((int *)b->qcoeff)[4] = 0;
169             ((int *)b->qcoeff)[5] = 0;
170             ((int *)b->qcoeff)[6] = 0;
171             ((int *)b->qcoeff)[7] = 0;
172         }
173         else
174         {
175             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
176             ((int *)b->qcoeff)[0] = 0;
177         }
178
179         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
180                         (xd->qcoeff, xd->block[0].dequant,
181                          xd->predictor, xd->dst.y_buffer,
182                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
183     }
184     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
185     {
186         for (i = 0; i < 16; i++)
187         {
188             BLOCKD *b = &xd->block[i];
189             vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
190             if (xd->eobs[i] > 1)
191             {
192                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
193                     (b->qcoeff, b->dequant,  b->predictor,
194                     *(b->base_dst) + b->dst, 16, b->dst_stride);
195             }
196             else
197             {
198                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
199                     (b->qcoeff[0] * b->dequant[0], b->predictor,
200                     *(b->base_dst) + b->dst, 16, b->dst_stride);
201                 ((int *)b->qcoeff)[0] = 0;
202             }
203         }
204     }
205     else
206     {
207         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
208                         (xd->qcoeff, xd->block[0].dequant,
209                          xd->predictor, xd->dst.y_buffer,
210                          xd->dst.y_stride, xd->eobs);
211     }
212
213     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
214                     (xd->qcoeff+16*16, xd->block[16].dequant,
215                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
216                      xd->dst.uv_stride, xd->eobs+16);
217 }
218
219
220 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
221 {
222     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
223     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
224     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
225     ENTROPY_CONTEXT_PLANES mb_row_left_context;
226
227     while (1)
228     {
229         if (pbi->b_multithreaded_rd == 0)
230             break;
231
232         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
233         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
234         {
235             if (pbi->b_multithreaded_rd == 0)
236                 break;
237             else
238             {
239                 VP8_COMMON *pc = &pbi->common;
240                 MACROBLOCKD *xd = &mbrd->mbd;
241
242                 int mb_row;
243                 int num_part = 1 << pbi->common.multi_token_partition;
244                 volatile int *last_row_current_mb_col;
245                 int nsync = pbi->sync_range;
246
247                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
248                 {
249                     int i;
250                     int recon_yoffset, recon_uvoffset;
251                     int mb_col;
252                     int ref_fb_idx = pc->lst_fb_idx;
253                     int dst_fb_idx = pc->new_fb_idx;
254                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
255                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
256
257                     int filter_level;
258                     loop_filter_info *lfi = pc->lf_info;
259                     int alt_flt_enabled = xd->segmentation_enabled;
260                     int Segment;
261
262                     pbi->mb_row_di[ithread].mb_row = mb_row;
263                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
264
265                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
266
267                     recon_yoffset = mb_row * recon_y_stride * 16;
268                     recon_uvoffset = mb_row * recon_uv_stride * 8;
269                     /* reset above block coeffs */
270
271                     xd->above_context = pc->above_context;
272                     xd->left_context = &mb_row_left_context;
273                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
274                     xd->up_available = (mb_row != 0);
275
276                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
277                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
278
279                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
280                     {
281                         if ((mb_col & (nsync-1)) == 0)
282                         {
283                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
284                             {
285                                 x86_pause_hint();
286                                 thread_sleep(0);
287                             }
288                         }
289
290                         update_blockd_bmi(xd);
291
292                         /* Distance of Mb to the various image edges.
293                          * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
294                          */
295                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
296                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
297
298                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
299                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
300                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
301
302                         xd->left_available = (mb_col != 0);
303
304                         /* Select the appropriate reference frame for this MB */
305                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
306                             ref_fb_idx = pc->lst_fb_idx;
307                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
308                             ref_fb_idx = pc->gld_fb_idx;
309                         else
310                             ref_fb_idx = pc->alt_fb_idx;
311
312                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
313                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
314                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
315
316                         vp8_build_uvmvs(xd, pc->full_pixel);
317                         decode_macroblock(pbi, xd, mb_row, mb_col);
318
319                         if (pbi->common.filter_level)
320                         {
321                             int skip_lf;
322                             if( mb_row != pc->mb_rows-1 )
323                             {
324                                 /* Save decoded MB last row data for next-row decoding */
325                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
326                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
327                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
328                             }
329
330                             /* save left_col for next MB decoding */
331                             if(mb_col != pc->mb_cols-1)
332                             {
333                                 MODE_INFO *next = xd->mode_info_context +1;
334
335                                 if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
336                                 {
337                                     for (i = 0; i < 16; i++)
338                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
339                                     for (i = 0; i < 8; i++)
340                                     {
341                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
342                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
343                                     }
344                                 }
345                             }
346
347                             /* update loopfilter info */
348                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
349                             skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
350                                             xd->mode_info_context->mbmi.mode != SPLITMV &&
351                                             xd->mode_info_context->mbmi.mb_skip_coeff);
352
353                             filter_level = pbi->mt_baseline_filter_level[Segment];
354                             /* Distance of Mb to the various image edges.
355                              * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
356                              * Apply any context driven MB level adjustment
357                              */
358                             filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
359
360                             /* loopfilter on this macroblock. */
361                             if (filter_level)
362                             {
363                                 if (mb_col > 0)
364                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
365
366                                 if (!skip_lf)
367                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
368
369                                 /* don't apply across umv border */
370                                 if (mb_row > 0)
371                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
372
373                                 if (!skip_lf)
374                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
375                             }
376                         }
377
378                         recon_yoffset += 16;
379                         recon_uvoffset += 8;
380
381                         ++xd->mode_info_context;  /* next mb */
382
383                         xd->above_context++;
384
385                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
386                         pbi->mt_current_mb_col[mb_row] = mb_col;
387                     }
388
389                     /* adjust to the next row of mbs */
390                     if (pbi->common.filter_level)
391                     {
392                         if(mb_row != pc->mb_rows-1)
393                         {
394                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
395                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
396
397                             for (i = 0; i < 4; i++)
398                             {
399                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
400                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
401                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
402                             }
403                         }
404                     } else
405                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
406
407                     ++xd->mode_info_context;      /* skip prediction column */
408
409                     /* since we have multithread */
410                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
411                 }
412             }
413         }
414         /*  add this to each frame */
415         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
416         {
417             /*SetEvent(pbi->h_event_end_decoding);*/
418             sem_post(&pbi->h_event_end_decoding);
419         }
420     }
421
422     return 0 ;
423 }
424
425
426 void vp8_decoder_create_threads(VP8D_COMP *pbi)
427 {
428     int core_count = 0;
429     int ithread;
430
431     pbi->b_multithreaded_rd = 0;
432     pbi->allocated_decoding_thread_count = 0;
433
434     /* limit decoding threads to the max number of token partitions */
435     core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
436
437     /* limit decoding threads to the available cores */
438     if (core_count > pbi->common.processor_core_count)
439         core_count = pbi->common.processor_core_count;
440
441     if (core_count > 1)
442     {
443         pbi->b_multithreaded_rd = 1;
444         pbi->decoding_thread_count = core_count - 1;
445
446         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
447         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
448         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
449         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
450         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
451
452         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
453         {
454             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
455
456             pbi->de_thread_data[ithread].ithread  = ithread;
457             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
458             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
459
460             pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
461         }
462
463         sem_init(&pbi->h_event_end_decoding, 0, 0);
464
465         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
466     }
467 }
468
469
470 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
471 {
472     int i;
473
474     if (pbi->b_multithreaded_rd)
475     {
476             vpx_free(pbi->mt_current_mb_col);
477             pbi->mt_current_mb_col = NULL ;
478
479         /* Free above_row buffers. */
480         if (pbi->mt_yabove_row)
481         {
482             for (i=0; i< mb_rows; i++)
483             {
484                     vpx_free(pbi->mt_yabove_row[i]);
485                     pbi->mt_yabove_row[i] = NULL ;
486             }
487             vpx_free(pbi->mt_yabove_row);
488             pbi->mt_yabove_row = NULL ;
489         }
490
491         if (pbi->mt_uabove_row)
492         {
493             for (i=0; i< mb_rows; i++)
494             {
495                     vpx_free(pbi->mt_uabove_row[i]);
496                     pbi->mt_uabove_row[i] = NULL ;
497             }
498             vpx_free(pbi->mt_uabove_row);
499             pbi->mt_uabove_row = NULL ;
500         }
501
502         if (pbi->mt_vabove_row)
503         {
504             for (i=0; i< mb_rows; i++)
505             {
506                     vpx_free(pbi->mt_vabove_row[i]);
507                     pbi->mt_vabove_row[i] = NULL ;
508             }
509             vpx_free(pbi->mt_vabove_row);
510             pbi->mt_vabove_row = NULL ;
511         }
512
513         /* Free left_col buffers. */
514         if (pbi->mt_yleft_col)
515         {
516             for (i=0; i< mb_rows; i++)
517             {
518                     vpx_free(pbi->mt_yleft_col[i]);
519                     pbi->mt_yleft_col[i] = NULL ;
520             }
521             vpx_free(pbi->mt_yleft_col);
522             pbi->mt_yleft_col = NULL ;
523         }
524
525         if (pbi->mt_uleft_col)
526         {
527             for (i=0; i< mb_rows; i++)
528             {
529                     vpx_free(pbi->mt_uleft_col[i]);
530                     pbi->mt_uleft_col[i] = NULL ;
531             }
532             vpx_free(pbi->mt_uleft_col);
533             pbi->mt_uleft_col = NULL ;
534         }
535
536         if (pbi->mt_vleft_col)
537         {
538             for (i=0; i< mb_rows; i++)
539             {
540                     vpx_free(pbi->mt_vleft_col[i]);
541                     pbi->mt_vleft_col[i] = NULL ;
542             }
543             vpx_free(pbi->mt_vleft_col);
544             pbi->mt_vleft_col = NULL ;
545         }
546     }
547 }
548
549
550 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
551 {
552     VP8_COMMON *const pc = & pbi->common;
553     int i;
554     int uv_width;
555
556     if (pbi->b_multithreaded_rd)
557     {
558         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
559
560         /* our internal buffers are always multiples of 16 */
561         if ((width & 0xf) != 0)
562             width += 16 - (width & 0xf);
563
564         if (width < 640) pbi->sync_range = 1;
565         else if (width <= 1280) pbi->sync_range = 8;
566         else if (width <= 2560) pbi->sync_range =16;
567         else pbi->sync_range = 32;
568
569         uv_width = width >>1;
570
571         /* Allocate an int for each mb row. */
572         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
573
574         /* Allocate memory for above_row buffers. */
575         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
576         for (i=0; i< pc->mb_rows; i++)
577             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
578
579         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
580         for (i=0; i< pc->mb_rows; i++)
581             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
582
583         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
584         for (i=0; i< pc->mb_rows; i++)
585             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
586
587         /* Allocate memory for left_col buffers. */
588         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
589         for (i=0; i< pc->mb_rows; i++)
590             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
591
592         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
593         for (i=0; i< pc->mb_rows; i++)
594             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
595
596         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
597         for (i=0; i< pc->mb_rows; i++)
598             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
599     }
600 }
601
602
603 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
604 {
605     /* shutdown MB Decoding thread; */
606     if (pbi->b_multithreaded_rd)
607     {
608         int i;
609
610         pbi->b_multithreaded_rd = 0;
611
612         /* allow all threads to exit */
613         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
614         {
615             sem_post(&pbi->h_event_start_decoding[i]);
616             pthread_join(pbi->h_decoding_thread[i], NULL);
617         }
618
619         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
620         {
621             sem_destroy(&pbi->h_event_start_decoding[i]);
622         }
623
624         sem_destroy(&pbi->h_event_end_decoding);
625
626             vpx_free(pbi->h_decoding_thread);
627             pbi->h_decoding_thread = NULL;
628
629             vpx_free(pbi->h_event_start_decoding);
630             pbi->h_event_start_decoding = NULL;
631
632             vpx_free(pbi->mb_row_di);
633             pbi->mb_row_di = NULL ;
634
635             vpx_free(pbi->de_thread_data);
636             pbi->de_thread_data = NULL;
637     }
638 }
639
640
641 static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
642 {
643     VP8_COMMON *cm  = &pbi->common;
644     MACROBLOCKD *mbd = &pbi->mb;
645     /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
646     loop_filter_info *lfi = cm->lf_info;
647     FRAME_TYPE frame_type = cm->frame_type;
648
649     /*int mb_row;
650     int mb_col;
651     int baseline_filter_level[MAX_MB_SEGMENTS];*/
652     int alt_flt_enabled = mbd->segmentation_enabled;
653
654     int i;
655     /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
656
657     /* Note the baseline filter values for each segment */
658     if (alt_flt_enabled)
659     {
660         for (i = 0; i < MAX_MB_SEGMENTS; i++)
661         {
662             /* Abs value */
663             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
664                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
665             /* Delta Value */
666             else
667             {
668                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
669                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
670             }
671         }
672     }
673     else
674     {
675         for (i = 0; i < MAX_MB_SEGMENTS; i++)
676             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
677     }
678
679     /* Initialize the loop filter for this frame. */
680     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
681         vp8_init_loop_filter(cm);
682     else if (frame_type != cm->last_frame_type)
683         vp8_frame_init_loop_filter(lfi, frame_type);
684 }
685
686
687 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
688 {
689     int mb_row;
690     VP8_COMMON *pc = &pbi->common;
691
692     int num_part = 1 << pbi->common.multi_token_partition;
693     int i;
694     volatile int *last_row_current_mb_col = NULL;
695     int nsync = pbi->sync_range;
696
697     int filter_level;
698     loop_filter_info *lfi = pc->lf_info;
699     int alt_flt_enabled = xd->segmentation_enabled;
700     int Segment;
701
702     if(pbi->common.filter_level)
703     {
704         /* Set above_row buffer to 127 for decoding first MB row */
705         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
706         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
707         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
708
709         for (i=1; i<pc->mb_rows; i++)
710         {
711             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
712             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
713             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
714         }
715
716         /* Set left_col to 129 initially */
717         for (i=0; i<pc->mb_rows; i++)
718         {
719             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
720             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
721             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
722         }
723         lpf_init(pbi, pc->filter_level);
724     }
725
726     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
727
728     for (i = 0; i < pbi->decoding_thread_count; i++)
729         sem_post(&pbi->h_event_start_decoding[i]);
730
731     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
732     {
733
734         xd->current_bc = &pbi->mbc[mb_row%num_part];
735
736         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
737         {
738             int i;
739             int recon_yoffset, recon_uvoffset;
740             int mb_col;
741             int ref_fb_idx = pc->lst_fb_idx;
742             int dst_fb_idx = pc->new_fb_idx;
743             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
744             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
745
746            /* volatile int *last_row_current_mb_col = NULL; */
747             if (mb_row > 0)
748                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
749
750             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
751             recon_yoffset = mb_row * recon_y_stride * 16;
752             recon_uvoffset = mb_row * recon_uv_stride * 8;
753             /* reset above block coeffs */
754
755             xd->above_context = pc->above_context;
756             xd->up_available = (mb_row != 0);
757
758             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
759             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
760
761             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
762             {
763                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
764                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
765                     {
766                         x86_pause_hint();
767                         thread_sleep(0);
768                     }
769                 }
770
771                 update_blockd_bmi(xd);
772
773                 /* Distance of Mb to the various image edges.
774                  * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
775                  */
776                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
777                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
778
779                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
780                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
781                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
782
783                 xd->left_available = (mb_col != 0);
784
785                 /* Select the appropriate reference frame for this MB */
786                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
787                     ref_fb_idx = pc->lst_fb_idx;
788                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
789                     ref_fb_idx = pc->gld_fb_idx;
790                 else
791                     ref_fb_idx = pc->alt_fb_idx;
792
793                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
794                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
795                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
796
797                 if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
798                 {
799                     /* propagate errors from reference frames */
800                     xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
801                 }
802
803                 vp8_build_uvmvs(xd, pc->full_pixel);
804                 decode_macroblock(pbi, xd, mb_row, mb_col);
805
806                 /* check if the boolean decoder has suffered an error */
807                 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
808
809                 if (pbi->common.filter_level)
810                 {
811                     int skip_lf;
812                     /* Save decoded MB last row data for next-row decoding */
813                     if(mb_row != pc->mb_rows-1)
814                     {
815                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
816                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
817                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
818                     }
819
820                     /* save left_col for next MB decoding */
821                     if(mb_col != pc->mb_cols-1)
822                     {
823                         MODE_INFO *next = xd->mode_info_context +1;
824
825                         if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
826                         {
827                             for (i = 0; i < 16; i++)
828                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
829                             for (i = 0; i < 8; i++)
830                             {
831                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
832                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
833                             }
834                         }
835                     }
836
837                     /* update loopfilter info */
838                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
839                     skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
840                                     xd->mode_info_context->mbmi.mode != SPLITMV &&
841                                     xd->mode_info_context->mbmi.mb_skip_coeff);
842                     filter_level = pbi->mt_baseline_filter_level[Segment];
843                     /* Distance of Mb to the various image edges.
844                      * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
845                      * Apply any context driven MB level adjustment
846                      */
847                     filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
848
849                     /* loopfilter on this macroblock. */
850                     if (filter_level)
851                     {
852                         if (mb_col > 0)
853                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
854
855                         if (!skip_lf)
856                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
857
858                         /* don't apply across umv border */
859                         if (mb_row > 0)
860                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
861
862                         if (!skip_lf)
863                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
864                     }
865                 }
866
867                 recon_yoffset += 16;
868                 recon_uvoffset += 8;
869
870                 ++xd->mode_info_context;  /* next mb */
871
872                 xd->above_context++;
873
874                 pbi->mt_current_mb_col[mb_row] = mb_col;
875             }
876
877             /* adjust to the next row of mbs */
878             if (pbi->common.filter_level)
879             {
880                 if(mb_row != pc->mb_rows-1)
881                 {
882                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
883                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
884
885                     for (i = 0; i < 4; i++)
886                     {
887                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
888                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
889                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
890                     }
891                 }
892             }else
893                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
894
895             ++xd->mode_info_context;      /* skip prediction column */
896         }
897         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
898     }
899
900     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
901 }