2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "threading.h"
19 #include "loopfilter.h"
21 #include "vpx_ports/vpx_timer.h"
23 extern void vp8_decode_mb_row(VP8D_COMP *pbi,
28 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
29 extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
31 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
36 #if CONFIG_MULTITHREAD
37 VP8_COMMON *const pc = & pbi->common;
40 for (i = 0; i < count; i++)
42 MACROBLOCKD *mbd = &mbrd[i].mbd;
43 #if CONFIG_RUNTIME_CPU_DETECT
48 mbd->subpixel_predict = xd->subpixel_predict;
49 mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
50 mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
51 mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
52 mbd->gf_active_ptr = xd->gf_active_ptr;
54 mbd->mode_info = pc->mi - 1;
55 mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
56 mbd->mode_info_stride = pc->mode_info_stride;
58 mbd->frame_type = pc->frame_type;
59 mbd->frames_since_golden = pc->frames_since_golden;
60 mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
62 mbd->pre = pc->last_frame;
63 mbd->dst = pc->new_frame;
68 vp8_setup_block_dptrs(mbd);
69 vp8_build_block_doffsets(mbd);
70 mbd->segmentation_enabled = xd->segmentation_enabled;
71 mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
72 vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
74 mbd->mbmi.mode = DC_PRED;
75 mbd->mbmi.uv_mode = DC_PRED;
77 mbd->current_bc = &pbi->bc2;
79 for (j = 0; j < 25; j++)
81 mbd->block[j].dequant = xd->block[j].dequant;
94 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
96 #if CONFIG_MULTITHREAD
97 int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
98 VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
99 MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
100 ENTROPY_CONTEXT mb_row_left_context[4][4];
104 if (pbi->b_multithreaded_rd == 0)
107 //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
108 if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
110 if (pbi->b_multithreaded_rd == 0)
114 VP8_COMMON *pc = &pbi->common;
115 int mb_row = mbrd->mb_row;
116 MACROBLOCKD *xd = &mbrd->mbd;
118 //printf("ithread:%d mb_row %d\n", ithread, mb_row);
120 int recon_yoffset, recon_uvoffset;
122 int recon_y_stride = pc->last_frame.y_stride;
123 int recon_uv_stride = pc->last_frame.uv_stride;
125 volatile int *last_row_current_mb_col;
128 last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
130 last_row_current_mb_col = &pbi->current_mb_col_main;
132 recon_yoffset = mb_row * recon_y_stride * 16;
133 recon_uvoffset = mb_row * recon_uv_stride * 8;
134 // reset above block coeffs
136 xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
137 xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
138 xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
139 xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
140 xd->left_context = mb_row_left_context;
141 vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
142 xd->up_available = (mb_row != 0);
144 xd->mb_to_top_edge = -((mb_row * 16)) << 3;
145 xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
147 for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
150 while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
156 // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
157 vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
159 if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
161 for (i = 0; i < 16; i++)
163 BLOCKD *d = &xd->block[i];
164 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
168 // Distance of Mb to the various image edges.
169 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
170 xd->mb_to_left_edge = -((mb_col * 16) << 3);
171 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
173 xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
174 xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
175 xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
177 xd->left_available = (mb_col != 0);
179 // Select the appropriate reference frame for this MB
180 if (xd->mbmi.ref_frame == LAST_FRAME)
182 xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
183 xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
184 xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
186 else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
188 // Golden frame reconstruction buffer
189 xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
190 xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
191 xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
195 // Alternate reference frame reconstruction buffer
196 xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
197 xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
198 xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
201 vp8_build_uvmvs(xd, pc->full_pixel);
203 vp8dx_bool_decoder_fill(xd->current_bc);
204 vp8_decode_macroblock(pbi, xd);
210 ++xd->mode_info_context; /* next mb */
212 xd->gf_active_ptr++; // GF useage flag for next MB
214 xd->above_context[Y1CONTEXT] += 4;
215 xd->above_context[UCONTEXT ] += 2;
216 xd->above_context[VCONTEXT ] += 2;
217 xd->above_context[Y2CONTEXT] ++;
218 pbi->mb_row_di[ithread].current_mb_col = mb_col;
222 // adjust to the next row of mbs
225 xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
228 ++xd->mode_info_context; /* skip prediction column */
230 // since we have multithread
231 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
233 //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
234 if ((mb_row & 1) == 1)
236 pbi->last_mb_row_decoded = mb_row;
237 //printf("S%d", pbi->last_mb_row_decoded);
240 if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
242 //SetEvent(pbi->h_event_main);
243 sem_post(&pbi->h_event_main);
257 THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
259 #if CONFIG_MULTITHREAD
260 VP8D_COMP *pbi = (VP8D_COMP *)p_data;
264 if (pbi->b_multithreaded_lf == 0)
267 //printf("before waiting for start_lpf\n");
269 //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
270 if (sem_wait(&pbi->h_event_start_lpf) == 0)
272 if (pbi->b_multithreaded_lf == 0) // we're shutting down
277 VP8_COMMON *cm = &pbi->common;
278 MACROBLOCKD *mbd = &pbi->lpfmb;
279 int default_filt_lvl = pbi->common.filter_level;
281 YV12_BUFFER_CONFIG *post = &cm->new_frame;
282 loop_filter_info *lfi = cm->lf_info;
288 int baseline_filter_level[MAX_MB_SEGMENTS];
290 int alt_flt_enabled = mbd->segmentation_enabled;
293 unsigned char *y_ptr, *u_ptr, *v_ptr;
295 volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
297 //MODE_INFO * this_mb_mode_info = cm->mi;
298 mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
300 // Note the baseline filter values for each segment
303 for (i = 0; i < MAX_MB_SEGMENTS; i++)
305 if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
306 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
309 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
310 baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
316 for (i = 0; i < MAX_MB_SEGMENTS; i++)
317 baseline_filter_level[i] = default_filt_lvl;
320 // Initialize the loop filter for this frame.
321 vp8_init_loop_filter(cm);
323 // Set up the buffer pointers
324 y_ptr = post->y_buffer;
325 u_ptr = post->u_buffer;
326 v_ptr = post->v_buffer;
328 // vp8_filter each macro block
329 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
332 while (mb_row >= *last_mb_row_decoded)
338 //printf("R%d", mb_row);
339 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
341 int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
343 filter_level = baseline_filter_level[Segment];
345 // Apply any context driven MB level adjustment
346 vp8_adjust_mb_lf_value(mbd, &filter_level);
351 cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
353 if (mbd->mode_info_context->mbmi.dc_diff > 0)
354 cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
356 // don't apply across umv border
358 cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
360 if (mbd->mode_info_context->mbmi.dc_diff > 0)
361 cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
368 mbd->mode_info_context++; // step to next MB
372 y_ptr += post->y_stride * 16 - post->y_width;
373 u_ptr += post->uv_stride * 8 - post->uv_width;
374 v_ptr += post->uv_stride * 8 - post->uv_width;
376 mbd->mode_info_context++; // Skip border mb
379 //printf("R%d\n", mb_row);
380 // When done, signal main thread that ME is finished
381 //SetEvent(pbi->h_event_lpf);
382 sem_post(&pbi->h_event_lpf);
394 void vp8_decoder_create_threads(VP8D_COMP *pbi)
396 #if CONFIG_MULTITHREAD
400 pbi->b_multithreaded_rd = 0;
401 pbi->b_multithreaded_lf = 0;
402 pbi->allocated_decoding_thread_count = 0;
403 core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
406 sem_init(&pbi->h_event_lpf, 0, 0);
407 sem_init(&pbi->h_event_start_lpf, 0, 0);
408 pbi->b_multithreaded_lf = 1;
409 pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
414 pbi->b_multithreaded_rd = 1;
415 pbi->decoding_thread_count = core_count - 1;
417 CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
418 CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
419 CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
420 vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
421 CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
423 for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
425 sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
427 pbi->de_thread_data[ithread].ithread = ithread;
428 pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
429 pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
431 pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
435 sem_init(&pbi->h_event_main, 0, 0);
436 pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
444 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
446 #if CONFIG_MULTITHREAD
448 if (pbi->b_multithreaded_lf)
450 pbi->b_multithreaded_lf = 0;
451 sem_post(&pbi->h_event_start_lpf);
452 pthread_join(pbi->h_thread_lpf, 0);
453 sem_destroy(&pbi->h_event_start_lpf);
456 //shutdown MB Decoding thread;
457 if (pbi->b_multithreaded_rd)
459 pbi->b_multithreaded_rd = 0;
460 // allow all threads to exit
464 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
467 sem_post(&pbi->h_event_mbrdecoding[i]);
468 pthread_join(pbi->h_decoding_thread[i], NULL);
474 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
476 sem_destroy(&pbi->h_event_mbrdecoding[i]);
482 sem_destroy(&pbi->h_event_main);
484 if (pbi->h_decoding_thread)
486 vpx_free(pbi->h_decoding_thread);
487 pbi->h_decoding_thread = NULL;
490 if (pbi->h_event_mbrdecoding)
492 vpx_free(pbi->h_event_mbrdecoding);
493 pbi->h_event_mbrdecoding = NULL;
498 vpx_free(pbi->mb_row_di);
499 pbi->mb_row_di = NULL ;
502 if (pbi->de_thread_data)
504 vpx_free(pbi->de_thread_data);
505 pbi->de_thread_data = NULL;
515 void vp8_start_lfthread(VP8D_COMP *pbi)
517 #if CONFIG_MULTITHREAD
518 memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
519 pbi->last_mb_row_decoded = 0;
520 sem_post(&pbi->h_event_start_lpf);
526 void vp8_stop_lfthread(VP8D_COMP *pbi)
528 #if CONFIG_MULTITHREAD
529 struct vpx_usec_timer timer;
531 vpx_usec_timer_start(&timer);
533 sem_wait(&pbi->h_event_lpf);
535 vpx_usec_timer_mark(&timer);
536 pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
543 void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
546 #if CONFIG_MULTITHREAD
548 VP8_COMMON *pc = &pbi->common;
551 int num_part = 1 << pbi->common.multi_token_partition;
553 vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
555 for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
558 pbi->current_mb_col_main = -1;
560 xd->current_bc = &pbi->mbc[ibc];
566 for (i = 0; i < pbi->decoding_thread_count; i++)
568 if ((mb_row + i + 1) >= pc->mb_rows)
571 pbi->mb_row_di[i].mb_row = mb_row + i + 1;
572 pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc];
578 pbi->mb_row_di[i].current_mb_col = -1;
579 sem_post(&pbi->h_event_mbrdecoding[i]);
582 vp8_decode_mb_row(pbi, pc, mb_row, xd);
584 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
586 if (mb_row < pc->mb_rows - 1)
588 sem_wait(&pbi->h_event_main);
592 pbi->last_mb_row_decoded = mb_row;