Merge "Copy macroblock data to a buffer before encoding it"
[profile/ivi/libvpx.git] / vp8 / decoder / threading.c
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11
12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
13 # include <unistd.h>
14 #endif
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vp8/common/threading.h"
18
19 #include "vp8/common/loopfilter.h"
20 #include "vp8/common/extend.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "detokenize.h"
23 #include "vp8/common/reconinter.h"
24 #include "reconintra_mt.h"
25 #if CONFIG_ERROR_CONCEALMENT
26 #include "error_concealment.h"
27 #endif
28
29 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
30 extern void clamp_mvs(MACROBLOCKD *xd);
31 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
32
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
35 #else
36 #define RTCD_VTABLE(x) NULL
37 #endif
38
39 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
40 {
41     VP8_COMMON *const pc = & pbi->common;
42     int i, j;
43
44     for (i = 0; i < count; i++)
45     {
46         MACROBLOCKD *mbd = &mbrd[i].mbd;
47 #if CONFIG_RUNTIME_CPU_DETECT
48         mbd->rtcd = xd->rtcd;
49 #endif
50         mbd->subpixel_predict        = xd->subpixel_predict;
51         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
52         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
53         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
54
55         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
56         mbd->mode_info_stride  = pc->mode_info_stride;
57
58         mbd->frame_type = pc->frame_type;
59         mbd->frames_since_golden      = pc->frames_since_golden;
60         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
61
62         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
63         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
64
65         vp8_setup_block_dptrs(mbd);
66         vp8_build_block_doffsets(mbd);
67         mbd->segmentation_enabled    = xd->segmentation_enabled;
68         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
69         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
70
71         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
72         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
73         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
74         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
75         /*unsigned char mode_ref_lf_delta_enabled;
76         unsigned char mode_ref_lf_delta_update;*/
77         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
78         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
79
80         mbd->current_bc = &pbi->bc2;
81
82         for (j = 0; j < 25; j++)
83         {
84             mbd->block[j].dequant = xd->block[j].dequant;
85         }
86     }
87
88     for (i=0; i< pc->mb_rows; i++)
89         pbi->mt_current_mb_col[i]=-1;
90 }
91
92
93 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
94 {
95     int eobtotal = 0;
96     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
97
98     if (xd->mode_info_context->mbmi.mb_skip_coeff)
99     {
100         vp8_reset_mb_tokens_context(xd);
101     }
102     else
103     {
104         eobtotal = vp8_decode_mb_tokens(pbi, xd);
105     }
106
107     /* Perform temporary clamping of the MV to be used for prediction */
108     if (do_clamp)
109     {
110         clamp_mvs(xd);
111     }
112
113     eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
114                   xd->mode_info_context->mbmi.mode == SPLITMV);
115     if (!eobtotal)
116     {
117         /* Special case:  Force the loopfilter to skip when eobtotal and
118          * mb_skip_coeff are zero.
119          * */
120         xd->mode_info_context->mbmi.mb_skip_coeff = 1;
121
122         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
123         if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
124         {
125             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
126             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
127         }
128         else
129         {
130             vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
131                                                xd->dst.u_buffer, xd->dst.v_buffer,
132                                                xd->dst.y_stride, xd->dst.uv_stride);
133         }
134         return;
135     }
136
137     if (xd->segmentation_enabled)
138         mb_init_dequantizer(pbi, xd);
139
140     /* do prediction */
141     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
142     {
143         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
144
145         if (xd->mode_info_context->mbmi.mode != B_PRED)
146         {
147             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
148         } else {
149             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
150         }
151     }
152     else
153     {
154         vp8_build_inter_predictors_mb(xd);
155     }
156
157 #if CONFIG_ERROR_CONCEALMENT
158     if (pbi->ec_enabled &&
159         (mb_row * pbi->common.mb_cols + mb_col >= pbi->mvs_corrupt_from_mb ||
160         vp8dx_bool_error(xd->current_bc)))
161     {
162         /* MB with corrupt residuals or corrupt mode/motion vectors.
163          * Better to use the predictor as reconstruction.
164          */
165         vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
166         vp8_conceal_corrupt_mb(xd);
167         return;
168     }
169 #endif
170
171     /* dequantization and idct */
172     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
173     {
174         BLOCKD *b = &xd->block[24];
175         DEQUANT_INVOKE(&pbi->dequant, block)(b);
176
177         /* do 2nd order transform on the dc block */
178         if (xd->eobs[24] > 1)
179         {
180             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
181             ((int *)b->qcoeff)[0] = 0;
182             ((int *)b->qcoeff)[1] = 0;
183             ((int *)b->qcoeff)[2] = 0;
184             ((int *)b->qcoeff)[3] = 0;
185             ((int *)b->qcoeff)[4] = 0;
186             ((int *)b->qcoeff)[5] = 0;
187             ((int *)b->qcoeff)[6] = 0;
188             ((int *)b->qcoeff)[7] = 0;
189         }
190         else
191         {
192             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
193             ((int *)b->qcoeff)[0] = 0;
194         }
195
196         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
197                         (xd->qcoeff, xd->block[0].dequant,
198                          xd->predictor, xd->dst.y_buffer,
199                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
200     }
201     else if (xd->mode_info_context->mbmi.mode == B_PRED)
202     {
203         for (i = 0; i < 16; i++)
204         {
205             BLOCKD *b = &xd->block[i];
206
207             vp8mt_predict_intra4x4(pbi, xd, b->bmi.as_mode, b->predictor, mb_row, mb_col, i);
208
209             if (xd->eobs[i] > 1)
210             {
211                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
212                     (b->qcoeff, b->dequant,  b->predictor,
213                     *(b->base_dst) + b->dst, 16, b->dst_stride);
214             }
215             else
216             {
217                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
218                     (b->qcoeff[0] * b->dequant[0], b->predictor,
219                     *(b->base_dst) + b->dst, 16, b->dst_stride);
220                 ((int *)b->qcoeff)[0] = 0;
221             }
222         }
223     }
224     else
225     {
226         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
227                         (xd->qcoeff, xd->block[0].dequant,
228                          xd->predictor, xd->dst.y_buffer,
229                          xd->dst.y_stride, xd->eobs);
230     }
231
232     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
233                     (xd->qcoeff+16*16, xd->block[16].dequant,
234                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
235                      xd->dst.uv_stride, xd->eobs+16);
236 }
237
238
239 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
240 {
241     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
242     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
243     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
244     ENTROPY_CONTEXT_PLANES mb_row_left_context;
245
246     while (1)
247     {
248         if (pbi->b_multithreaded_rd == 0)
249             break;
250
251         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
252         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
253         {
254             if (pbi->b_multithreaded_rd == 0)
255                 break;
256             else
257             {
258                 VP8_COMMON *pc = &pbi->common;
259                 MACROBLOCKD *xd = &mbrd->mbd;
260
261                 int mb_row;
262                 int num_part = 1 << pbi->common.multi_token_partition;
263                 volatile int *last_row_current_mb_col;
264                 int nsync = pbi->sync_range;
265
266                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
267                 {
268                     int i;
269                     int recon_yoffset, recon_uvoffset;
270                     int mb_col;
271                     int ref_fb_idx = pc->lst_fb_idx;
272                     int dst_fb_idx = pc->new_fb_idx;
273                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
274                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
275
276                     int filter_level;
277                     loop_filter_info *lfi = pc->lf_info;
278                     int alt_flt_enabled = xd->segmentation_enabled;
279                     int Segment;
280
281                     pbi->mb_row_di[ithread].mb_row = mb_row;
282                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
283
284                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
285
286                     recon_yoffset = mb_row * recon_y_stride * 16;
287                     recon_uvoffset = mb_row * recon_uv_stride * 8;
288                     /* reset above block coeffs */
289
290                     xd->above_context = pc->above_context;
291                     xd->left_context = &mb_row_left_context;
292                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
293                     xd->up_available = (mb_row != 0);
294
295                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
296                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
297
298                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
299                     {
300                         if ((mb_col & (nsync-1)) == 0)
301                         {
302                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
303                             {
304                                 x86_pause_hint();
305                                 thread_sleep(0);
306                             }
307                         }
308
309                         update_blockd_bmi(xd);
310
311                         /* Distance of MB to the various image edges.
312                          * These are specified to 8th pel as they are always
313                          * compared to values that are in 1/8th pel units.
314                          */
315                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
316                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
317
318 #if CONFIG_ERROR_CONCEALMENT
319                         if (pbi->ec_enabled &&
320                             (xd->mode_info_context->mbmi.ref_frame ==
321                                                                  INTRA_FRAME) &&
322                             vp8dx_bool_error(xd->current_bc))
323                         {
324                             /* We have an intra block with corrupt coefficients,
325                              * better to conceal with an inter block.
326                              * Interpolate MVs from neighboring MBs
327                              *
328                              * Note that for the first mb with corrupt residual
329                              * in a frame, we might not discover that before
330                              * decoding the residual. That happens after this
331                              * check, and therefore no inter concealment will be
332                              * done.
333                              */
334                             vp8_interpolate_motion(xd,
335                                                    mb_row, mb_col,
336                                                    pc->mb_rows, pc->mb_cols,
337                                                    pc->mode_info_stride);
338                         }
339 #endif
340
341
342                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
343                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
344                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
345
346                         xd->left_available = (mb_col != 0);
347
348                         /* Select the appropriate reference frame for this MB */
349                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
350                             ref_fb_idx = pc->lst_fb_idx;
351                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
352                             ref_fb_idx = pc->gld_fb_idx;
353                         else
354                             ref_fb_idx = pc->alt_fb_idx;
355
356                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
357                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
358                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
359
360                         vp8_build_uvmvs(xd, pc->full_pixel);
361                         decode_macroblock(pbi, xd, mb_row, mb_col);
362
363                         if (pbi->common.filter_level)
364                         {
365                             int skip_lf;
366                             if( mb_row != pc->mb_rows-1 )
367                             {
368                                 /* Save decoded MB last row data for next-row decoding */
369                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
370                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
371                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
372                             }
373
374                             /* save left_col for next MB decoding */
375                             if(mb_col != pc->mb_cols-1)
376                             {
377                                 MODE_INFO *next = xd->mode_info_context +1;
378
379                                 if (next->mbmi.ref_frame == INTRA_FRAME)
380                                 {
381                                     for (i = 0; i < 16; i++)
382                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
383                                     for (i = 0; i < 8; i++)
384                                     {
385                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
386                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
387                                     }
388                                 }
389                             }
390
391                             /* update loopfilter info */
392                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
393                             skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
394                                             xd->mode_info_context->mbmi.mode != SPLITMV &&
395                                             xd->mode_info_context->mbmi.mb_skip_coeff);
396
397                             filter_level = pbi->mt_baseline_filter_level[Segment];
398                             /* Distance of Mb to the various image edges.
399                              * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
400                              * Apply any context driven MB level adjustment
401                              */
402                             filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
403
404                             /* loopfilter on this macroblock. */
405                             if (filter_level)
406                             {
407                                 if (mb_col > 0)
408                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
409
410                                 if (!skip_lf)
411                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
412
413                                 /* don't apply across umv border */
414                                 if (mb_row > 0)
415                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
416
417                                 if (!skip_lf)
418                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
419                             }
420                         }
421
422                         recon_yoffset += 16;
423                         recon_uvoffset += 8;
424
425                         ++xd->mode_info_context;  /* next mb */
426
427                         xd->above_context++;
428
429                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
430                         pbi->mt_current_mb_col[mb_row] = mb_col;
431                     }
432
433                     /* adjust to the next row of mbs */
434                     if (pbi->common.filter_level)
435                     {
436                         if(mb_row != pc->mb_rows-1)
437                         {
438                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
439                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
440
441                             for (i = 0; i < 4; i++)
442                             {
443                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
444                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
445                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
446                             }
447                         }
448                     } else
449                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
450
451                     ++xd->mode_info_context;      /* skip prediction column */
452
453                     /* since we have multithread */
454                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
455                 }
456             }
457         }
458         /*  add this to each frame */
459         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
460         {
461             /*SetEvent(pbi->h_event_end_decoding);*/
462             sem_post(&pbi->h_event_end_decoding);
463         }
464     }
465
466     return 0 ;
467 }
468
469
470 void vp8_decoder_create_threads(VP8D_COMP *pbi)
471 {
472     int core_count = 0;
473     int ithread;
474
475     pbi->b_multithreaded_rd = 0;
476     pbi->allocated_decoding_thread_count = 0;
477
478     /* limit decoding threads to the max number of token partitions */
479     core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
480
481     /* limit decoding threads to the available cores */
482     if (core_count > pbi->common.processor_core_count)
483         core_count = pbi->common.processor_core_count;
484
485     if (core_count > 1)
486     {
487         pbi->b_multithreaded_rd = 1;
488         pbi->decoding_thread_count = core_count - 1;
489
490         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
491         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
492         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
493         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
494         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
495
496         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
497         {
498             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
499
500             pbi->de_thread_data[ithread].ithread  = ithread;
501             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
502             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
503
504             pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
505         }
506
507         sem_init(&pbi->h_event_end_decoding, 0, 0);
508
509         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
510     }
511 }
512
513
514 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
515 {
516     int i;
517
518     if (pbi->b_multithreaded_rd)
519     {
520             vpx_free(pbi->mt_current_mb_col);
521             pbi->mt_current_mb_col = NULL ;
522
523         /* Free above_row buffers. */
524         if (pbi->mt_yabove_row)
525         {
526             for (i=0; i< mb_rows; i++)
527             {
528                     vpx_free(pbi->mt_yabove_row[i]);
529                     pbi->mt_yabove_row[i] = NULL ;
530             }
531             vpx_free(pbi->mt_yabove_row);
532             pbi->mt_yabove_row = NULL ;
533         }
534
535         if (pbi->mt_uabove_row)
536         {
537             for (i=0; i< mb_rows; i++)
538             {
539                     vpx_free(pbi->mt_uabove_row[i]);
540                     pbi->mt_uabove_row[i] = NULL ;
541             }
542             vpx_free(pbi->mt_uabove_row);
543             pbi->mt_uabove_row = NULL ;
544         }
545
546         if (pbi->mt_vabove_row)
547         {
548             for (i=0; i< mb_rows; i++)
549             {
550                     vpx_free(pbi->mt_vabove_row[i]);
551                     pbi->mt_vabove_row[i] = NULL ;
552             }
553             vpx_free(pbi->mt_vabove_row);
554             pbi->mt_vabove_row = NULL ;
555         }
556
557         /* Free left_col buffers. */
558         if (pbi->mt_yleft_col)
559         {
560             for (i=0; i< mb_rows; i++)
561             {
562                     vpx_free(pbi->mt_yleft_col[i]);
563                     pbi->mt_yleft_col[i] = NULL ;
564             }
565             vpx_free(pbi->mt_yleft_col);
566             pbi->mt_yleft_col = NULL ;
567         }
568
569         if (pbi->mt_uleft_col)
570         {
571             for (i=0; i< mb_rows; i++)
572             {
573                     vpx_free(pbi->mt_uleft_col[i]);
574                     pbi->mt_uleft_col[i] = NULL ;
575             }
576             vpx_free(pbi->mt_uleft_col);
577             pbi->mt_uleft_col = NULL ;
578         }
579
580         if (pbi->mt_vleft_col)
581         {
582             for (i=0; i< mb_rows; i++)
583             {
584                     vpx_free(pbi->mt_vleft_col[i]);
585                     pbi->mt_vleft_col[i] = NULL ;
586             }
587             vpx_free(pbi->mt_vleft_col);
588             pbi->mt_vleft_col = NULL ;
589         }
590     }
591 }
592
593
594 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
595 {
596     VP8_COMMON *const pc = & pbi->common;
597     int i;
598     int uv_width;
599
600     if (pbi->b_multithreaded_rd)
601     {
602         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
603
604         /* our internal buffers are always multiples of 16 */
605         if ((width & 0xf) != 0)
606             width += 16 - (width & 0xf);
607
608         if (width < 640) pbi->sync_range = 1;
609         else if (width <= 1280) pbi->sync_range = 8;
610         else if (width <= 2560) pbi->sync_range =16;
611         else pbi->sync_range = 32;
612
613         uv_width = width >>1;
614
615         /* Allocate an int for each mb row. */
616         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
617
618         /* Allocate memory for above_row buffers. */
619         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
620         for (i=0; i< pc->mb_rows; i++)
621             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
622
623         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
624         for (i=0; i< pc->mb_rows; i++)
625             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
626
627         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
628         for (i=0; i< pc->mb_rows; i++)
629             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
630
631         /* Allocate memory for left_col buffers. */
632         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
633         for (i=0; i< pc->mb_rows; i++)
634             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
635
636         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
637         for (i=0; i< pc->mb_rows; i++)
638             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
639
640         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
641         for (i=0; i< pc->mb_rows; i++)
642             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
643     }
644 }
645
646
647 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
648 {
649     /* shutdown MB Decoding thread; */
650     if (pbi->b_multithreaded_rd)
651     {
652         int i;
653
654         pbi->b_multithreaded_rd = 0;
655
656         /* allow all threads to exit */
657         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
658         {
659             sem_post(&pbi->h_event_start_decoding[i]);
660             pthread_join(pbi->h_decoding_thread[i], NULL);
661         }
662
663         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
664         {
665             sem_destroy(&pbi->h_event_start_decoding[i]);
666         }
667
668         sem_destroy(&pbi->h_event_end_decoding);
669
670             vpx_free(pbi->h_decoding_thread);
671             pbi->h_decoding_thread = NULL;
672
673             vpx_free(pbi->h_event_start_decoding);
674             pbi->h_event_start_decoding = NULL;
675
676             vpx_free(pbi->mb_row_di);
677             pbi->mb_row_di = NULL ;
678
679             vpx_free(pbi->de_thread_data);
680             pbi->de_thread_data = NULL;
681     }
682 }
683
684
685 static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
686 {
687     VP8_COMMON *cm  = &pbi->common;
688     MACROBLOCKD *mbd = &pbi->mb;
689     /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
690     loop_filter_info *lfi = cm->lf_info;
691     FRAME_TYPE frame_type = cm->frame_type;
692
693     /*int mb_row;
694     int mb_col;
695     int baseline_filter_level[MAX_MB_SEGMENTS];*/
696     int alt_flt_enabled = mbd->segmentation_enabled;
697
698     int i;
699     /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
700
701     /* Note the baseline filter values for each segment */
702     if (alt_flt_enabled)
703     {
704         for (i = 0; i < MAX_MB_SEGMENTS; i++)
705         {
706             /* Abs value */
707             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
708                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
709             /* Delta Value */
710             else
711             {
712                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
713                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
714             }
715         }
716     }
717     else
718     {
719         for (i = 0; i < MAX_MB_SEGMENTS; i++)
720             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
721     }
722
723     /* Initialize the loop filter for this frame. */
724     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
725         vp8_init_loop_filter(cm);
726     else if (frame_type != cm->last_frame_type)
727         vp8_frame_init_loop_filter(lfi, frame_type);
728 }
729
730
731 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
732 {
733     int mb_row;
734     VP8_COMMON *pc = &pbi->common;
735
736     int num_part = 1 << pbi->common.multi_token_partition;
737     int i;
738     volatile int *last_row_current_mb_col = NULL;
739     int nsync = pbi->sync_range;
740
741     int filter_level;
742     loop_filter_info *lfi = pc->lf_info;
743     int alt_flt_enabled = xd->segmentation_enabled;
744     int Segment;
745
746     if(pbi->common.filter_level)
747     {
748         /* Set above_row buffer to 127 for decoding first MB row */
749         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
750         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
751         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
752
753         for (i=1; i<pc->mb_rows; i++)
754         {
755             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
756             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
757             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
758         }
759
760         /* Set left_col to 129 initially */
761         for (i=0; i<pc->mb_rows; i++)
762         {
763             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
764             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
765             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
766         }
767         lpf_init(pbi, pc->filter_level);
768     }
769
770     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
771
772     for (i = 0; i < pbi->decoding_thread_count; i++)
773         sem_post(&pbi->h_event_start_decoding[i]);
774
775     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
776     {
777
778         xd->current_bc = &pbi->mbc[mb_row%num_part];
779
780         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
781         {
782             int i;
783             int recon_yoffset, recon_uvoffset;
784             int mb_col;
785             int ref_fb_idx = pc->lst_fb_idx;
786             int dst_fb_idx = pc->new_fb_idx;
787             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
788             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
789
790            /* volatile int *last_row_current_mb_col = NULL; */
791             if (mb_row > 0)
792                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
793
794             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
795             recon_yoffset = mb_row * recon_y_stride * 16;
796             recon_uvoffset = mb_row * recon_uv_stride * 8;
797             /* reset above block coeffs */
798
799             xd->above_context = pc->above_context;
800             xd->up_available = (mb_row != 0);
801
802             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
803             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
804
805             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
806             {
807                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
808                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
809                     {
810                         x86_pause_hint();
811                         thread_sleep(0);
812                     }
813                 }
814
815                 update_blockd_bmi(xd);
816
817                 /* Distance of MB to the various image edges.
818                  * These are specified to 8th pel as they are always compared to
819                  * values that are in 1/8th pel units.
820                  */
821                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
822                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
823
824 #if CONFIG_ERROR_CONCEALMENT
825                 if (pbi->ec_enabled &&
826                     (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
827                     vp8dx_bool_error(xd->current_bc))
828                 {
829                     /* We have an intra block with corrupt coefficients, better
830                      * to conceal with an inter block. Interpolate MVs from
831                      * neighboring MBs
832                      *
833                      * Note that for the first mb with corrupt residual in a
834                      * frame, we might not discover that before decoding the
835                      * residual. That happens after this check, and therefore no
836                      * inter concealment will be done.
837                      */
838                     vp8_interpolate_motion(xd,
839                                            mb_row, mb_col,
840                                            pc->mb_rows, pc->mb_cols,
841                                            pc->mode_info_stride);
842                 }
843 #endif
844
845
846                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
847                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
848                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
849
850                 xd->left_available = (mb_col != 0);
851
852                 /* Select the appropriate reference frame for this MB */
853                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
854                     ref_fb_idx = pc->lst_fb_idx;
855                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
856                     ref_fb_idx = pc->gld_fb_idx;
857                 else
858                     ref_fb_idx = pc->alt_fb_idx;
859
860                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
861                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
862                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
863
864                 if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
865                 {
866                     /* propagate errors from reference frames */
867                     xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
868                 }
869
870                 vp8_build_uvmvs(xd, pc->full_pixel);
871                 decode_macroblock(pbi, xd, mb_row, mb_col);
872
873                 /* check if the boolean decoder has suffered an error */
874                 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
875
876                 if (pbi->common.filter_level)
877                 {
878                     int skip_lf;
879                     /* Save decoded MB last row data for next-row decoding */
880                     if(mb_row != pc->mb_rows-1)
881                     {
882                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
883                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
884                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
885                     }
886
887                     /* save left_col for next MB decoding */
888                     if(mb_col != pc->mb_cols-1)
889                     {
890                         MODE_INFO *next = xd->mode_info_context +1;
891
892                         if (next->mbmi.ref_frame == INTRA_FRAME)
893                         {
894                             for (i = 0; i < 16; i++)
895                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
896                             for (i = 0; i < 8; i++)
897                             {
898                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
899                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
900                             }
901                         }
902                     }
903
904                     /* update loopfilter info */
905                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
906                     skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
907                                     xd->mode_info_context->mbmi.mode != SPLITMV &&
908                                     xd->mode_info_context->mbmi.mb_skip_coeff);
909                     filter_level = pbi->mt_baseline_filter_level[Segment];
910                     /* Distance of Mb to the various image edges.
911                      * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
912                      * Apply any context driven MB level adjustment
913                      */
914                     filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
915
916                     /* loopfilter on this macroblock. */
917                     if (filter_level)
918                     {
919                         if (mb_col > 0)
920                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
921
922                         if (!skip_lf)
923                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
924
925                         /* don't apply across umv border */
926                         if (mb_row > 0)
927                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
928
929                         if (!skip_lf)
930                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
931                     }
932                 }
933
934                 recon_yoffset += 16;
935                 recon_uvoffset += 8;
936
937                 ++xd->mode_info_context;  /* next mb */
938
939                 xd->above_context++;
940
941                 pbi->mt_current_mb_col[mb_row] = mb_col;
942             }
943
944             /* adjust to the next row of mbs */
945             if (pbi->common.filter_level)
946             {
947                 if(mb_row != pc->mb_rows-1)
948                 {
949                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
950                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
951
952                     for (i = 0; i < 4; i++)
953                     {
954                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
955                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
956                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
957                     }
958                 }
959             }else
960                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
961
962             ++xd->mode_info_context;      /* skip prediction column */
963         }
964         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
965     }
966
967     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
968 }