2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vp8/common/threading.h"
19 #include "vp8/common/loopfilter.h"
20 #include "vp8/common/extend.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "detokenize.h"
23 #include "vp8/common/reconinter.h"
24 #include "reconintra_mt.h"
26 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
27 extern void clamp_mvs(MACROBLOCKD *xd);
28 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
30 #if CONFIG_RUNTIME_CPU_DETECT
31 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
33 #define RTCD_VTABLE(x) NULL
36 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
38 VP8_COMMON *const pc = & pbi->common;
41 for (i = 0; i < count; i++)
43 MACROBLOCKD *mbd = &mbrd[i].mbd;
44 #if CONFIG_RUNTIME_CPU_DETECT
47 mbd->subpixel_predict = xd->subpixel_predict;
48 mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
49 mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
50 mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
52 mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
53 mbd->mode_info_stride = pc->mode_info_stride;
55 mbd->frame_type = pc->frame_type;
56 mbd->frames_since_golden = pc->frames_since_golden;
57 mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
59 mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
60 mbd->dst = pc->yv12_fb[pc->new_fb_idx];
62 vp8_setup_block_dptrs(mbd);
63 vp8_build_block_doffsets(mbd);
64 mbd->segmentation_enabled = xd->segmentation_enabled;
65 mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
66 vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
68 /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
69 vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
70 /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
71 vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
72 /*unsigned char mode_ref_lf_delta_enabled;
73 unsigned char mode_ref_lf_delta_update;*/
74 mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
75 mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
77 mbd->current_bc = &pbi->bc2;
79 for (j = 0; j < 25; j++)
81 mbd->block[j].dequant = xd->block[j].dequant;
85 for (i=0; i< pc->mb_rows; i++)
86 pbi->mt_current_mb_col[i]=-1;
90 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
93 int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
94 VP8_COMMON *pc = &pbi->common;
96 if (xd->mode_info_context->mbmi.mb_skip_coeff)
98 vp8_reset_mb_tokens_context(xd);
102 eobtotal = vp8_decode_mb_tokens(pbi, xd);
105 /* Perform temporary clamping of the MV to be used for prediction */
111 eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
112 xd->mode_info_context->mbmi.mode == SPLITMV);
115 /* Special case: Force the loopfilter to skip when eobtotal and
116 * mb_skip_coeff are zero.
118 xd->mode_info_context->mbmi.mb_skip_coeff = 1;
120 /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
121 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
123 vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
124 vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
128 vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
129 xd->dst.u_buffer, xd->dst.v_buffer,
130 xd->dst.y_stride, xd->dst.uv_stride);
135 if (xd->segmentation_enabled)
136 mb_init_dequantizer(pbi, xd);
139 if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
141 vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
143 if (xd->mode_info_context->mbmi.mode != B_PRED)
145 vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
147 vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
152 vp8_build_inter_predictors_mb(xd);
155 /* dequantization and idct */
156 if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
158 BLOCKD *b = &xd->block[24];
159 DEQUANT_INVOKE(&pbi->dequant, block)(b);
161 /* do 2nd order transform on the dc block */
162 if (xd->eobs[24] > 1)
164 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
165 ((int *)b->qcoeff)[0] = 0;
166 ((int *)b->qcoeff)[1] = 0;
167 ((int *)b->qcoeff)[2] = 0;
168 ((int *)b->qcoeff)[3] = 0;
169 ((int *)b->qcoeff)[4] = 0;
170 ((int *)b->qcoeff)[5] = 0;
171 ((int *)b->qcoeff)[6] = 0;
172 ((int *)b->qcoeff)[7] = 0;
176 IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
177 ((int *)b->qcoeff)[0] = 0;
180 DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
181 (xd->qcoeff, xd->block[0].dequant,
182 xd->predictor, xd->dst.y_buffer,
183 xd->dst.y_stride, xd->eobs, xd->block[24].diff);
185 else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
187 for (i = 0; i < 16; i++)
189 BLOCKD *b = &xd->block[i];
190 vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
194 DEQUANT_INVOKE(&pbi->dequant, idct_add)
195 (b->qcoeff, b->dequant, b->predictor,
196 *(b->base_dst) + b->dst, 16, b->dst_stride);
200 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
201 (b->qcoeff[0] * b->dequant[0], b->predictor,
202 *(b->base_dst) + b->dst, 16, b->dst_stride);
203 ((int *)b->qcoeff)[0] = 0;
209 DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
210 (xd->qcoeff, xd->block[0].dequant,
211 xd->predictor, xd->dst.y_buffer,
212 xd->dst.y_stride, xd->eobs);
215 DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
216 (xd->qcoeff+16*16, xd->block[16].dequant,
217 xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
218 xd->dst.uv_stride, xd->eobs+16);
222 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
224 int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
225 VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
226 MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
227 ENTROPY_CONTEXT_PLANES mb_row_left_context;
231 if (pbi->b_multithreaded_rd == 0)
234 /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
235 if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
237 if (pbi->b_multithreaded_rd == 0)
241 VP8_COMMON *pc = &pbi->common;
242 MACROBLOCKD *xd = &mbrd->mbd;
245 int num_part = 1 << pbi->common.multi_token_partition;
246 volatile int *last_row_current_mb_col;
247 int nsync = pbi->sync_range;
249 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
252 int recon_yoffset, recon_uvoffset;
254 int ref_fb_idx = pc->lst_fb_idx;
255 int dst_fb_idx = pc->new_fb_idx;
256 int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
257 int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
260 loop_filter_info *lfi = pc->lf_info;
261 int alt_flt_enabled = xd->segmentation_enabled;
264 pbi->mb_row_di[ithread].mb_row = mb_row;
265 pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part];
267 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
269 recon_yoffset = mb_row * recon_y_stride * 16;
270 recon_uvoffset = mb_row * recon_uv_stride * 8;
271 /* reset above block coeffs */
273 xd->above_context = pc->above_context;
274 xd->left_context = &mb_row_left_context;
275 vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
276 xd->up_available = (mb_row != 0);
278 xd->mb_to_top_edge = -((mb_row * 16)) << 3;
279 xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
281 for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
283 if ((mb_col & (nsync-1)) == 0)
285 while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
292 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
294 for (i = 0; i < 16; i++)
296 BLOCKD *d = &xd->block[i];
297 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
301 /* Distance of Mb to the various image edges.
302 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
304 xd->mb_to_left_edge = -((mb_col * 16) << 3);
305 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
307 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
308 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
309 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
311 xd->left_available = (mb_col != 0);
313 /* Select the appropriate reference frame for this MB */
314 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
315 ref_fb_idx = pc->lst_fb_idx;
316 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
317 ref_fb_idx = pc->gld_fb_idx;
319 ref_fb_idx = pc->alt_fb_idx;
321 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
322 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
323 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
325 vp8_build_uvmvs(xd, pc->full_pixel);
326 decode_macroblock(pbi, xd, mb_row, mb_col);
328 if (pbi->common.filter_level)
331 if( mb_row != pc->mb_rows-1 )
333 /* Save decoded MB last row data for next-row decoding */
334 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
335 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
336 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
339 /* save left_col for next MB decoding */
340 if(mb_col != pc->mb_cols-1)
342 MODE_INFO *next = xd->mode_info_context +1;
344 if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
346 for (i = 0; i < 16; i++)
347 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
348 for (i = 0; i < 8; i++)
350 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
351 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
356 /* update loopfilter info */
357 Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
358 skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
359 xd->mode_info_context->mbmi.mode != SPLITMV &&
360 xd->mode_info_context->mbmi.mb_skip_coeff);
362 filter_level = pbi->mt_baseline_filter_level[Segment];
363 /* Distance of Mb to the various image edges.
364 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
365 * Apply any context driven MB level adjustment
367 filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
369 /* loopfilter on this macroblock. */
373 pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
376 pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
378 /* don't apply across umv border */
380 pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
383 pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
390 ++xd->mode_info_context; /* next mb */
394 /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
395 pbi->mt_current_mb_col[mb_row] = mb_col;
398 /* adjust to the next row of mbs */
399 if (pbi->common.filter_level)
401 if(mb_row != pc->mb_rows-1)
403 int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
404 int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
406 for (i = 0; i < 4; i++)
408 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
409 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
410 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
414 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
416 ++xd->mode_info_context; /* skip prediction column */
418 /* since we have multithread */
419 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
423 /* add this to each frame */
424 if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
426 /*SetEvent(pbi->h_event_end_decoding);*/
427 sem_post(&pbi->h_event_end_decoding);
435 void vp8_decoder_create_threads(VP8D_COMP *pbi)
440 pbi->b_multithreaded_rd = 0;
441 pbi->allocated_decoding_thread_count = 0;
443 /* limit decoding threads to the max number of token partitions */
444 core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
446 /* limit decoding threads to the available cores */
447 if (core_count > pbi->common.processor_core_count)
448 core_count = pbi->common.processor_core_count;
452 pbi->b_multithreaded_rd = 1;
453 pbi->decoding_thread_count = core_count - 1;
455 CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
456 CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
457 CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
458 vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
459 CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
461 for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
463 sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
465 pbi->de_thread_data[ithread].ithread = ithread;
466 pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
467 pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
469 pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
472 sem_init(&pbi->h_event_end_decoding, 0, 0);
474 pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
479 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
481 VP8_COMMON *const pc = & pbi->common;
484 if (pbi->b_multithreaded_rd)
486 vpx_free(pbi->mt_current_mb_col);
487 pbi->mt_current_mb_col = NULL ;
489 /* Free above_row buffers. */
490 if (pbi->mt_yabove_row)
492 for (i=0; i< mb_rows; i++)
494 vpx_free(pbi->mt_yabove_row[i]);
495 pbi->mt_yabove_row[i] = NULL ;
497 vpx_free(pbi->mt_yabove_row);
498 pbi->mt_yabove_row = NULL ;
501 if (pbi->mt_uabove_row)
503 for (i=0; i< mb_rows; i++)
505 vpx_free(pbi->mt_uabove_row[i]);
506 pbi->mt_uabove_row[i] = NULL ;
508 vpx_free(pbi->mt_uabove_row);
509 pbi->mt_uabove_row = NULL ;
512 if (pbi->mt_vabove_row)
514 for (i=0; i< mb_rows; i++)
516 vpx_free(pbi->mt_vabove_row[i]);
517 pbi->mt_vabove_row[i] = NULL ;
519 vpx_free(pbi->mt_vabove_row);
520 pbi->mt_vabove_row = NULL ;
523 /* Free left_col buffers. */
524 if (pbi->mt_yleft_col)
526 for (i=0; i< mb_rows; i++)
528 vpx_free(pbi->mt_yleft_col[i]);
529 pbi->mt_yleft_col[i] = NULL ;
531 vpx_free(pbi->mt_yleft_col);
532 pbi->mt_yleft_col = NULL ;
535 if (pbi->mt_uleft_col)
537 for (i=0; i< mb_rows; i++)
539 vpx_free(pbi->mt_uleft_col[i]);
540 pbi->mt_uleft_col[i] = NULL ;
542 vpx_free(pbi->mt_uleft_col);
543 pbi->mt_uleft_col = NULL ;
546 if (pbi->mt_vleft_col)
548 for (i=0; i< mb_rows; i++)
550 vpx_free(pbi->mt_vleft_col[i]);
551 pbi->mt_vleft_col[i] = NULL ;
553 vpx_free(pbi->mt_vleft_col);
554 pbi->mt_vleft_col = NULL ;
560 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
562 VP8_COMMON *const pc = & pbi->common;
566 if (pbi->b_multithreaded_rd)
568 vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
570 /* our internal buffers are always multiples of 16 */
571 if ((width & 0xf) != 0)
572 width += 16 - (width & 0xf);
574 if (width < 640) pbi->sync_range = 1;
575 else if (width <= 1280) pbi->sync_range = 8;
576 else if (width <= 2560) pbi->sync_range =16;
577 else pbi->sync_range = 32;
579 uv_width = width >>1;
581 /* Allocate an int for each mb row. */
582 CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
584 /* Allocate memory for above_row buffers. */
585 CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
586 for (i=0; i< pc->mb_rows; i++)
587 CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
589 CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
590 for (i=0; i< pc->mb_rows; i++)
591 CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
593 CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
594 for (i=0; i< pc->mb_rows; i++)
595 CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
597 /* Allocate memory for left_col buffers. */
598 CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
599 for (i=0; i< pc->mb_rows; i++)
600 CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
602 CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
603 for (i=0; i< pc->mb_rows; i++)
604 CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
606 CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
607 for (i=0; i< pc->mb_rows; i++)
608 CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
613 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
615 /* shutdown MB Decoding thread; */
616 if (pbi->b_multithreaded_rd)
620 pbi->b_multithreaded_rd = 0;
622 /* allow all threads to exit */
623 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
625 sem_post(&pbi->h_event_start_decoding[i]);
626 pthread_join(pbi->h_decoding_thread[i], NULL);
629 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
631 sem_destroy(&pbi->h_event_start_decoding[i]);
634 sem_destroy(&pbi->h_event_end_decoding);
636 vpx_free(pbi->h_decoding_thread);
637 pbi->h_decoding_thread = NULL;
639 vpx_free(pbi->h_event_start_decoding);
640 pbi->h_event_start_decoding = NULL;
642 vpx_free(pbi->mb_row_di);
643 pbi->mb_row_di = NULL ;
645 vpx_free(pbi->de_thread_data);
646 pbi->de_thread_data = NULL;
651 static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
653 VP8_COMMON *cm = &pbi->common;
654 MACROBLOCKD *mbd = &pbi->mb;
655 /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
656 loop_filter_info *lfi = cm->lf_info;
657 FRAME_TYPE frame_type = cm->frame_type;
661 int baseline_filter_level[MAX_MB_SEGMENTS];*/
662 int alt_flt_enabled = mbd->segmentation_enabled;
665 /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
667 /* Note the baseline filter values for each segment */
670 for (i = 0; i < MAX_MB_SEGMENTS; i++)
673 if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
674 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
678 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
679 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
685 for (i = 0; i < MAX_MB_SEGMENTS; i++)
686 pbi->mt_baseline_filter_level[i] = default_filt_lvl;
689 /* Initialize the loop filter for this frame. */
690 if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
691 vp8_init_loop_filter(cm);
692 else if (frame_type != cm->last_frame_type)
693 vp8_frame_init_loop_filter(lfi, frame_type);
697 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
700 VP8_COMMON *pc = &pbi->common;
703 int num_part = 1 << pbi->common.multi_token_partition;
705 volatile int *last_row_current_mb_col = NULL;
706 int nsync = pbi->sync_range;
709 loop_filter_info *lfi = pc->lf_info;
710 int alt_flt_enabled = xd->segmentation_enabled;
713 if(pbi->common.filter_level)
715 /* Set above_row buffer to 127 for decoding first MB row */
716 vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
717 vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
718 vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
720 for (i=1; i<pc->mb_rows; i++)
722 vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
723 vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
724 vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
727 /* Set left_col to 129 initially */
728 for (i=0; i<pc->mb_rows; i++)
730 vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
731 vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
732 vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
734 lpf_init(pbi, pc->filter_level);
737 setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
739 for (i = 0; i < pbi->decoding_thread_count; i++)
740 sem_post(&pbi->h_event_start_decoding[i]);
742 for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
745 xd->current_bc = &pbi->mbc[mb_row%num_part];
747 /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
750 int recon_yoffset, recon_uvoffset;
752 int ref_fb_idx = pc->lst_fb_idx;
753 int dst_fb_idx = pc->new_fb_idx;
754 int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
755 int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
757 /* volatile int *last_row_current_mb_col = NULL; */
759 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
761 vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
762 recon_yoffset = mb_row * recon_y_stride * 16;
763 recon_uvoffset = mb_row * recon_uv_stride * 8;
764 /* reset above block coeffs */
766 xd->above_context = pc->above_context;
767 xd->up_available = (mb_row != 0);
769 xd->mb_to_top_edge = -((mb_row * 16)) << 3;
770 xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
772 for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
774 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
775 while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
782 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
784 for (i = 0; i < 16; i++)
786 BLOCKD *d = &xd->block[i];
787 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
791 /* Distance of Mb to the various image edges.
792 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
794 xd->mb_to_left_edge = -((mb_col * 16) << 3);
795 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
797 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
798 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
799 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
801 xd->left_available = (mb_col != 0);
803 /* Select the appropriate reference frame for this MB */
804 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
805 ref_fb_idx = pc->lst_fb_idx;
806 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
807 ref_fb_idx = pc->gld_fb_idx;
809 ref_fb_idx = pc->alt_fb_idx;
811 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
812 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
813 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
815 if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
817 /* propagate errors from reference frames */
818 xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
821 vp8_build_uvmvs(xd, pc->full_pixel);
822 decode_macroblock(pbi, xd, mb_row, mb_col);
824 /* check if the boolean decoder has suffered an error */
825 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
827 if (pbi->common.filter_level)
830 /* Save decoded MB last row data for next-row decoding */
831 if(mb_row != pc->mb_rows-1)
833 vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
834 vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
835 vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
838 /* save left_col for next MB decoding */
839 if(mb_col != pc->mb_cols-1)
841 MODE_INFO *next = xd->mode_info_context +1;
843 if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
845 for (i = 0; i < 16; i++)
846 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
847 for (i = 0; i < 8; i++)
849 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
850 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
855 /* update loopfilter info */
856 Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
857 skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
858 xd->mode_info_context->mbmi.mode != SPLITMV &&
859 xd->mode_info_context->mbmi.mb_skip_coeff);
860 filter_level = pbi->mt_baseline_filter_level[Segment];
861 /* Distance of Mb to the various image edges.
862 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
863 * Apply any context driven MB level adjustment
865 filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
867 /* loopfilter on this macroblock. */
871 pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
874 pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
876 /* don't apply across umv border */
878 pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
881 pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]);
888 ++xd->mode_info_context; /* next mb */
892 pbi->mt_current_mb_col[mb_row] = mb_col;
895 /* adjust to the next row of mbs */
896 if (pbi->common.filter_level)
898 if(mb_row != pc->mb_rows-1)
900 int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
901 int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
903 for (i = 0; i < 4; i++)
905 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
906 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
907 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
911 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
913 ++xd->mode_info_context; /* skip prediction column */
915 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
918 sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */