fix sharpness bug and clean up
[profile/ivi/libvpx.git] / vp8 / decoder / threading.c
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11
12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
13 # include <unistd.h>
14 #endif
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vp8/common/threading.h"
18
19 #include "vp8/common/loopfilter.h"
20 #include "vp8/common/extend.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "detokenize.h"
23 #include "vp8/common/reconinter.h"
24 #include "reconintra_mt.h"
25 #if CONFIG_ERROR_CONCEALMENT
26 #include "error_concealment.h"
27 #endif
28
29 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
30 extern void clamp_mvs(MACROBLOCKD *xd);
31 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
32
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
35 #else
36 #define RTCD_VTABLE(x) NULL
37 #endif
38
39 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
40 {
41     VP8_COMMON *const pc = & pbi->common;
42     int i, j;
43
44     for (i = 0; i < count; i++)
45     {
46         MACROBLOCKD *mbd = &mbrd[i].mbd;
47 #if CONFIG_RUNTIME_CPU_DETECT
48         mbd->rtcd = xd->rtcd;
49 #endif
50         mbd->subpixel_predict        = xd->subpixel_predict;
51         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
52         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
53         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
54
55         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
56         mbd->mode_info_stride  = pc->mode_info_stride;
57
58         mbd->frame_type = pc->frame_type;
59         mbd->frames_since_golden      = pc->frames_since_golden;
60         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
61
62         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
63         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
64
65         vp8_setup_block_dptrs(mbd);
66         vp8_build_block_doffsets(mbd);
67         mbd->segmentation_enabled    = xd->segmentation_enabled;
68         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
69         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
70
71         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
72         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
73         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
74         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
75         /*unsigned char mode_ref_lf_delta_enabled;
76         unsigned char mode_ref_lf_delta_update;*/
77         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
78         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
79
80         mbd->current_bc = &pbi->bc2;
81
82         for (j = 0; j < 25; j++)
83         {
84             mbd->block[j].dequant = xd->block[j].dequant;
85         }
86     }
87
88     for (i=0; i< pc->mb_rows; i++)
89         pbi->mt_current_mb_col[i]=-1;
90 }
91
92
93 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
94 {
95     int eobtotal = 0;
96     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
97
98     if (xd->mode_info_context->mbmi.mb_skip_coeff)
99     {
100         vp8_reset_mb_tokens_context(xd);
101     }
102     else
103     {
104         eobtotal = vp8_decode_mb_tokens(pbi, xd);
105     }
106
107     /* Perform temporary clamping of the MV to be used for prediction */
108     if (do_clamp)
109     {
110         clamp_mvs(xd);
111     }
112
113     eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
114                   xd->mode_info_context->mbmi.mode == SPLITMV);
115     if (!eobtotal)
116     {
117         /* Special case:  Force the loopfilter to skip when eobtotal and
118          * mb_skip_coeff are zero.
119          * */
120         xd->mode_info_context->mbmi.mb_skip_coeff = 1;
121
122         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
123         if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
124         {
125             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
126             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
127         }
128         else
129         {
130             vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
131                                                xd->dst.u_buffer, xd->dst.v_buffer,
132                                                xd->dst.y_stride, xd->dst.uv_stride);
133         }
134         return;
135     }
136
137     if (xd->segmentation_enabled)
138         mb_init_dequantizer(pbi, xd);
139
140     /* do prediction */
141     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
142     {
143         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
144
145         if (xd->mode_info_context->mbmi.mode != B_PRED)
146         {
147             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
148         } else {
149             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
150         }
151     }
152     else
153     {
154         vp8_build_inter_predictors_mb(xd);
155     }
156
157 #if CONFIG_ERROR_CONCEALMENT
158     if (pbi->ec_enabled &&
159         (mb_row * pbi->common.mb_cols + mb_col >= pbi->mvs_corrupt_from_mb ||
160         vp8dx_bool_error(xd->current_bc)))
161     {
162         /* MB with corrupt residuals or corrupt mode/motion vectors.
163          * Better to use the predictor as reconstruction.
164          */
165         vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
166         vp8_conceal_corrupt_mb(xd);
167         return;
168     }
169 #endif
170
171     /* dequantization and idct */
172     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
173     {
174         BLOCKD *b = &xd->block[24];
175         DEQUANT_INVOKE(&pbi->dequant, block)(b);
176
177         /* do 2nd order transform on the dc block */
178         if (xd->eobs[24] > 1)
179         {
180             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
181             ((int *)b->qcoeff)[0] = 0;
182             ((int *)b->qcoeff)[1] = 0;
183             ((int *)b->qcoeff)[2] = 0;
184             ((int *)b->qcoeff)[3] = 0;
185             ((int *)b->qcoeff)[4] = 0;
186             ((int *)b->qcoeff)[5] = 0;
187             ((int *)b->qcoeff)[6] = 0;
188             ((int *)b->qcoeff)[7] = 0;
189         }
190         else
191         {
192             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
193             ((int *)b->qcoeff)[0] = 0;
194         }
195
196         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
197                         (xd->qcoeff, xd->block[0].dequant,
198                          xd->predictor, xd->dst.y_buffer,
199                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
200     }
201     else if (xd->mode_info_context->mbmi.mode == B_PRED)
202     {
203         for (i = 0; i < 16; i++)
204         {
205             BLOCKD *b = &xd->block[i];
206
207             vp8mt_predict_intra4x4(pbi, xd, b->bmi.as_mode, b->predictor, mb_row, mb_col, i);
208
209             if (xd->eobs[i] > 1)
210             {
211                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
212                     (b->qcoeff, b->dequant,  b->predictor,
213                     *(b->base_dst) + b->dst, 16, b->dst_stride);
214             }
215             else
216             {
217                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
218                     (b->qcoeff[0] * b->dequant[0], b->predictor,
219                     *(b->base_dst) + b->dst, 16, b->dst_stride);
220                 ((int *)b->qcoeff)[0] = 0;
221             }
222         }
223     }
224     else
225     {
226         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
227                         (xd->qcoeff, xd->block[0].dequant,
228                          xd->predictor, xd->dst.y_buffer,
229                          xd->dst.y_stride, xd->eobs);
230     }
231
232     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
233                     (xd->qcoeff+16*16, xd->block[16].dequant,
234                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
235                      xd->dst.uv_stride, xd->eobs+16);
236 }
237
238
239 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
240 {
241     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
242     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
243     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
244     ENTROPY_CONTEXT_PLANES mb_row_left_context;
245
246     while (1)
247     {
248         if (pbi->b_multithreaded_rd == 0)
249             break;
250
251         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
252         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
253         {
254             if (pbi->b_multithreaded_rd == 0)
255                 break;
256             else
257             {
258                 VP8_COMMON *pc = &pbi->common;
259                 MACROBLOCKD *xd = &mbrd->mbd;
260
261                 int mb_row;
262                 int num_part = 1 << pbi->common.multi_token_partition;
263                 volatile int *last_row_current_mb_col;
264                 int nsync = pbi->sync_range;
265
266                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
267                 {
268                     int i;
269                     int recon_yoffset, recon_uvoffset;
270                     int mb_col;
271                     int ref_fb_idx = pc->lst_fb_idx;
272                     int dst_fb_idx = pc->new_fb_idx;
273                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
274                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
275
276                     int filter_level;
277                     loop_filter_info_n *lfi_n = &pc->lf_info;
278
279                     pbi->mb_row_di[ithread].mb_row = mb_row;
280                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
281
282                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
283
284                     recon_yoffset = mb_row * recon_y_stride * 16;
285                     recon_uvoffset = mb_row * recon_uv_stride * 8;
286                     /* reset above block coeffs */
287
288                     xd->above_context = pc->above_context;
289                     xd->left_context = &mb_row_left_context;
290                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
291                     xd->up_available = (mb_row != 0);
292
293                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
294                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
295
296                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
297                     {
298                         if ((mb_col & (nsync-1)) == 0)
299                         {
300                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
301                             {
302                                 x86_pause_hint();
303                                 thread_sleep(0);
304                             }
305                         }
306
307                         update_blockd_bmi(xd);
308
309                         /* Distance of MB to the various image edges.
310                          * These are specified to 8th pel as they are always
311                          * compared to values that are in 1/8th pel units.
312                          */
313                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
314                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
315
316 #if CONFIG_ERROR_CONCEALMENT
317                         if (pbi->ec_enabled &&
318                             (xd->mode_info_context->mbmi.ref_frame ==
319                                                                  INTRA_FRAME) &&
320                             vp8dx_bool_error(xd->current_bc))
321                         {
322                             /* We have an intra block with corrupt coefficients,
323                              * better to conceal with an inter block.
324                              * Interpolate MVs from neighboring MBs
325                              *
326                              * Note that for the first mb with corrupt residual
327                              * in a frame, we might not discover that before
328                              * decoding the residual. That happens after this
329                              * check, and therefore no inter concealment will be
330                              * done.
331                              */
332                             vp8_interpolate_motion(xd,
333                                                    mb_row, mb_col,
334                                                    pc->mb_rows, pc->mb_cols,
335                                                    pc->mode_info_stride);
336                         }
337 #endif
338
339
340                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
341                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
342                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
343
344                         xd->left_available = (mb_col != 0);
345
346                         /* Select the appropriate reference frame for this MB */
347                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
348                             ref_fb_idx = pc->lst_fb_idx;
349                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
350                             ref_fb_idx = pc->gld_fb_idx;
351                         else
352                             ref_fb_idx = pc->alt_fb_idx;
353
354                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
355                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
356                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
357
358                         vp8_build_uvmvs(xd, pc->full_pixel);
359                         decode_macroblock(pbi, xd, mb_row, mb_col);
360
361                         if (pbi->common.filter_level)
362                         {
363                             int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
364                                             xd->mode_info_context->mbmi.mode != SPLITMV &&
365                                             xd->mode_info_context->mbmi.mb_skip_coeff);
366
367                             const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
368                             const int seg = xd->mode_info_context->mbmi.segment_id;
369                             const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
370
371                             filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
372
373                             if( mb_row != pc->mb_rows-1 )
374                             {
375                                 /* Save decoded MB last row data for next-row decoding */
376                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
377                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
378                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
379                             }
380
381                             /* save left_col for next MB decoding */
382                             if(mb_col != pc->mb_cols-1)
383                             {
384                                 MODE_INFO *next = xd->mode_info_context +1;
385
386                                 if (next->mbmi.ref_frame == INTRA_FRAME)
387                                 {
388                                     for (i = 0; i < 16; i++)
389                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
390                                     for (i = 0; i < 8; i++)
391                                     {
392                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
393                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
394                                     }
395                                 }
396                             }
397
398                             /* loopfilter on this macroblock. */
399                             if (filter_level)
400                             {
401                                 if(pc->filter_type == NORMAL_LOOPFILTER)
402                                 {
403                                     loop_filter_info lfi;
404                                     FRAME_TYPE frame_type = pc->frame_type;
405                                     const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
406                                     lfi.mblim = lfi_n->mblim[filter_level];
407                                     lfi.blim = lfi_n->blim[filter_level];
408                                     lfi.lim = lfi_n->lim[filter_level];
409                                     lfi.hev_thr = lfi_n->hev_thr[hev_index];
410
411                                     if (mb_col > 0)
412                                         LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
413                                         (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
414
415                                     if (!skip_lf)
416                                         LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
417                                         (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
418
419                                     /* don't apply across umv border */
420                                     if (mb_row > 0)
421                                         LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
422                                         (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
423
424                                     if (!skip_lf)
425                                         LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
426                                         (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer,  recon_y_stride, recon_uv_stride, &lfi);
427                                 }
428                                 else
429                                 {
430                                     if (mb_col > 0)
431                                         LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
432                                         (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
433
434                                     if (!skip_lf)
435                                         LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
436                                         (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
437
438                                     /* don't apply across umv border */
439                                     if (mb_row > 0)
440                                         LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
441                                         (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
442
443                                     if (!skip_lf)
444                                         LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
445                                         (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
446                                 }
447                             }
448
449                         }
450
451                         recon_yoffset += 16;
452                         recon_uvoffset += 8;
453
454                         ++xd->mode_info_context;  /* next mb */
455
456                         xd->above_context++;
457
458                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
459                         pbi->mt_current_mb_col[mb_row] = mb_col;
460                     }
461
462                     /* adjust to the next row of mbs */
463                     if (pbi->common.filter_level)
464                     {
465                         if(mb_row != pc->mb_rows-1)
466                         {
467                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
468                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
469
470                             for (i = 0; i < 4; i++)
471                             {
472                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
473                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
474                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
475                             }
476                         }
477                     } else
478                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
479
480                     ++xd->mode_info_context;      /* skip prediction column */
481
482                     /* since we have multithread */
483                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
484                 }
485             }
486         }
487         /*  add this to each frame */
488         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
489         {
490             /*SetEvent(pbi->h_event_end_decoding);*/
491             sem_post(&pbi->h_event_end_decoding);
492         }
493     }
494
495     return 0 ;
496 }
497
498
499 void vp8_decoder_create_threads(VP8D_COMP *pbi)
500 {
501     int core_count = 0;
502     int ithread;
503
504     pbi->b_multithreaded_rd = 0;
505     pbi->allocated_decoding_thread_count = 0;
506
507     /* limit decoding threads to the max number of token partitions */
508     core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
509
510     /* limit decoding threads to the available cores */
511     if (core_count > pbi->common.processor_core_count)
512         core_count = pbi->common.processor_core_count;
513
514     if (core_count > 1)
515     {
516         pbi->b_multithreaded_rd = 1;
517         pbi->decoding_thread_count = core_count - 1;
518
519         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
520         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
521         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
522         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
523         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
524
525         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
526         {
527             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
528
529             pbi->de_thread_data[ithread].ithread  = ithread;
530             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
531             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
532
533             pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
534         }
535
536         sem_init(&pbi->h_event_end_decoding, 0, 0);
537
538         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
539     }
540 }
541
542
543 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
544 {
545     int i;
546
547     if (pbi->b_multithreaded_rd)
548     {
549             vpx_free(pbi->mt_current_mb_col);
550             pbi->mt_current_mb_col = NULL ;
551
552         /* Free above_row buffers. */
553         if (pbi->mt_yabove_row)
554         {
555             for (i=0; i< mb_rows; i++)
556             {
557                     vpx_free(pbi->mt_yabove_row[i]);
558                     pbi->mt_yabove_row[i] = NULL ;
559             }
560             vpx_free(pbi->mt_yabove_row);
561             pbi->mt_yabove_row = NULL ;
562         }
563
564         if (pbi->mt_uabove_row)
565         {
566             for (i=0; i< mb_rows; i++)
567             {
568                     vpx_free(pbi->mt_uabove_row[i]);
569                     pbi->mt_uabove_row[i] = NULL ;
570             }
571             vpx_free(pbi->mt_uabove_row);
572             pbi->mt_uabove_row = NULL ;
573         }
574
575         if (pbi->mt_vabove_row)
576         {
577             for (i=0; i< mb_rows; i++)
578             {
579                     vpx_free(pbi->mt_vabove_row[i]);
580                     pbi->mt_vabove_row[i] = NULL ;
581             }
582             vpx_free(pbi->mt_vabove_row);
583             pbi->mt_vabove_row = NULL ;
584         }
585
586         /* Free left_col buffers. */
587         if (pbi->mt_yleft_col)
588         {
589             for (i=0; i< mb_rows; i++)
590             {
591                     vpx_free(pbi->mt_yleft_col[i]);
592                     pbi->mt_yleft_col[i] = NULL ;
593             }
594             vpx_free(pbi->mt_yleft_col);
595             pbi->mt_yleft_col = NULL ;
596         }
597
598         if (pbi->mt_uleft_col)
599         {
600             for (i=0; i< mb_rows; i++)
601             {
602                     vpx_free(pbi->mt_uleft_col[i]);
603                     pbi->mt_uleft_col[i] = NULL ;
604             }
605             vpx_free(pbi->mt_uleft_col);
606             pbi->mt_uleft_col = NULL ;
607         }
608
609         if (pbi->mt_vleft_col)
610         {
611             for (i=0; i< mb_rows; i++)
612             {
613                     vpx_free(pbi->mt_vleft_col[i]);
614                     pbi->mt_vleft_col[i] = NULL ;
615             }
616             vpx_free(pbi->mt_vleft_col);
617             pbi->mt_vleft_col = NULL ;
618         }
619     }
620 }
621
622
623 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
624 {
625     VP8_COMMON *const pc = & pbi->common;
626     int i;
627     int uv_width;
628
629     if (pbi->b_multithreaded_rd)
630     {
631         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
632
633         /* our internal buffers are always multiples of 16 */
634         if ((width & 0xf) != 0)
635             width += 16 - (width & 0xf);
636
637         if (width < 640) pbi->sync_range = 1;
638         else if (width <= 1280) pbi->sync_range = 8;
639         else if (width <= 2560) pbi->sync_range =16;
640         else pbi->sync_range = 32;
641
642         uv_width = width >>1;
643
644         /* Allocate an int for each mb row. */
645         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
646
647         /* Allocate memory for above_row buffers. */
648         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
649         for (i=0; i< pc->mb_rows; i++)
650             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
651
652         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
653         for (i=0; i< pc->mb_rows; i++)
654             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
655
656         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
657         for (i=0; i< pc->mb_rows; i++)
658             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
659
660         /* Allocate memory for left_col buffers. */
661         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
662         for (i=0; i< pc->mb_rows; i++)
663             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
664
665         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
666         for (i=0; i< pc->mb_rows; i++)
667             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
668
669         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
670         for (i=0; i< pc->mb_rows; i++)
671             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
672     }
673 }
674
675
676 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
677 {
678     /* shutdown MB Decoding thread; */
679     if (pbi->b_multithreaded_rd)
680     {
681         int i;
682
683         pbi->b_multithreaded_rd = 0;
684
685         /* allow all threads to exit */
686         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
687         {
688             sem_post(&pbi->h_event_start_decoding[i]);
689             pthread_join(pbi->h_decoding_thread[i], NULL);
690         }
691
692         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
693         {
694             sem_destroy(&pbi->h_event_start_decoding[i]);
695         }
696
697         sem_destroy(&pbi->h_event_end_decoding);
698
699             vpx_free(pbi->h_decoding_thread);
700             pbi->h_decoding_thread = NULL;
701
702             vpx_free(pbi->h_event_start_decoding);
703             pbi->h_event_start_decoding = NULL;
704
705             vpx_free(pbi->mb_row_di);
706             pbi->mb_row_di = NULL ;
707
708             vpx_free(pbi->de_thread_data);
709             pbi->de_thread_data = NULL;
710     }
711 }
712
713 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
714 {
715     int mb_row;
716     VP8_COMMON *pc = &pbi->common;
717
718     int num_part = 1 << pbi->common.multi_token_partition;
719     int i;
720     volatile int *last_row_current_mb_col = NULL;
721     int nsync = pbi->sync_range;
722
723     int filter_level = pc->filter_level;
724     loop_filter_info_n *lfi_n = &pc->lf_info;
725
726     if (filter_level)
727     {
728         /* Set above_row buffer to 127 for decoding first MB row */
729         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
730         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
731         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
732
733         for (i=1; i<pc->mb_rows; i++)
734         {
735             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
736             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
737             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
738         }
739
740         /* Set left_col to 129 initially */
741         for (i=0; i<pc->mb_rows; i++)
742         {
743             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
744             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
745             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
746         }
747
748         /* Initialize the loop filter for this frame. */
749         vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
750     }
751
752     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
753
754     for (i = 0; i < pbi->decoding_thread_count; i++)
755         sem_post(&pbi->h_event_start_decoding[i]);
756
757     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
758     {
759         xd->current_bc = &pbi->mbc[mb_row%num_part];
760
761         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
762         {
763             int i;
764             int recon_yoffset, recon_uvoffset;
765             int mb_col;
766             int ref_fb_idx = pc->lst_fb_idx;
767             int dst_fb_idx = pc->new_fb_idx;
768             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
769             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
770
771            /* volatile int *last_row_current_mb_col = NULL; */
772             if (mb_row > 0)
773                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
774
775             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
776             recon_yoffset = mb_row * recon_y_stride * 16;
777             recon_uvoffset = mb_row * recon_uv_stride * 8;
778             /* reset above block coeffs */
779
780             xd->above_context = pc->above_context;
781             xd->up_available = (mb_row != 0);
782
783             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
784             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
785
786             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
787             {
788                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
789                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
790                     {
791                         x86_pause_hint();
792                         thread_sleep(0);
793                     }
794                 }
795
796                 update_blockd_bmi(xd);
797
798                 /* Distance of MB to the various image edges.
799                  * These are specified to 8th pel as they are always compared to
800                  * values that are in 1/8th pel units.
801                  */
802                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
803                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
804
805 #if CONFIG_ERROR_CONCEALMENT
806                 if (pbi->ec_enabled &&
807                     (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
808                     vp8dx_bool_error(xd->current_bc))
809                 {
810                     /* We have an intra block with corrupt coefficients, better
811                      * to conceal with an inter block. Interpolate MVs from
812                      * neighboring MBs
813                      *
814                      * Note that for the first mb with corrupt residual in a
815                      * frame, we might not discover that before decoding the
816                      * residual. That happens after this check, and therefore no
817                      * inter concealment will be done.
818                      */
819                     vp8_interpolate_motion(xd,
820                                            mb_row, mb_col,
821                                            pc->mb_rows, pc->mb_cols,
822                                            pc->mode_info_stride);
823                 }
824 #endif
825
826
827                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
828                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
829                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
830
831                 xd->left_available = (mb_col != 0);
832
833                 /* Select the appropriate reference frame for this MB */
834                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
835                     ref_fb_idx = pc->lst_fb_idx;
836                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
837                     ref_fb_idx = pc->gld_fb_idx;
838                 else
839                     ref_fb_idx = pc->alt_fb_idx;
840
841                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
842                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
843                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
844
845                 if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
846                 {
847                     /* propagate errors from reference frames */
848                     xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
849                 }
850
851                 vp8_build_uvmvs(xd, pc->full_pixel);
852                 decode_macroblock(pbi, xd, mb_row, mb_col);
853
854                 /* check if the boolean decoder has suffered an error */
855                 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
856
857                 if (pbi->common.filter_level)
858                 {
859                     int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
860                                     xd->mode_info_context->mbmi.mode != SPLITMV &&
861                                     xd->mode_info_context->mbmi.mb_skip_coeff);
862
863                     const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
864                     const int seg = xd->mode_info_context->mbmi.segment_id;
865                     const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
866
867                     filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
868
869                     /* Save decoded MB last row data for next-row decoding */
870                     if(mb_row != pc->mb_rows-1)
871                     {
872                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
873                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
874                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
875                     }
876
877                     /* save left_col for next MB decoding */
878                     if(mb_col != pc->mb_cols-1)
879                     {
880                         MODE_INFO *next = xd->mode_info_context +1;
881
882                         if (next->mbmi.ref_frame == INTRA_FRAME)
883                         {
884                             for (i = 0; i < 16; i++)
885                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
886                             for (i = 0; i < 8; i++)
887                             {
888                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
889                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
890                             }
891                         }
892                     }
893
894                     /* loopfilter on this macroblock. */
895                     if (filter_level)
896                     {
897                         if(pc->filter_type == NORMAL_LOOPFILTER)
898                         {
899                             loop_filter_info lfi;
900                             FRAME_TYPE frame_type = pc->frame_type;
901                             const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
902                             lfi.mblim = lfi_n->mblim[filter_level];
903                             lfi.blim = lfi_n->blim[filter_level];
904                             lfi.lim = lfi_n->lim[filter_level];
905                             lfi.hev_thr = lfi_n->hev_thr[hev_index];
906
907                             if (mb_col > 0)
908                                 LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v)
909                                 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
910
911                             if (!skip_lf)
912                                 LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v)
913                                 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
914
915                             /* don't apply across umv border */
916                             if (mb_row > 0)
917                                 LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h)
918                                 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
919
920                             if (!skip_lf)
921                                 LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h)
922                                 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer,  recon_y_stride, recon_uv_stride, &lfi);
923                         }
924                         else
925                         {
926                             if (mb_col > 0)
927                                 LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v)
928                                 (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
929
930                             if (!skip_lf)
931                                 LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v)
932                                 (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
933
934                             /* don't apply across umv border */
935                             if (mb_row > 0)
936                                 LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h)
937                                 (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
938
939                             if (!skip_lf)
940                                 LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h)
941                                 (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
942                         }
943                     }
944
945                 }
946                 recon_yoffset += 16;
947                 recon_uvoffset += 8;
948
949                 ++xd->mode_info_context;  /* next mb */
950
951                 xd->above_context++;
952
953                 pbi->mt_current_mb_col[mb_row] = mb_col;
954             }
955
956             /* adjust to the next row of mbs */
957             if (pbi->common.filter_level)
958             {
959                 if(mb_row != pc->mb_rows-1)
960                 {
961                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
962                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
963
964                     for (i = 0; i < 4; i++)
965                     {
966                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
967                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
968                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
969                     }
970                 }
971             }else
972                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
973
974             ++xd->mode_info_context;      /* skip prediction column */
975         }
976         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
977     }
978
979     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
980 }