Detect toolchain based on gcc -dumpmachine
[profile/ivi/libvpx.git] / vp8 / decoder / threading.c
1 /*
2  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license 
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may 
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11
12 #ifndef WIN32
13 # include <unistd.h>
14 #endif
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "threading.h"
18
19 #include "loopfilter.h"
20 #include "extend.h"
21 #include "vpx_ports/vpx_timer.h"
22
23 extern void vp8_decode_mb_row(VP8D_COMP *pbi,
24                               VP8_COMMON *pc,
25                               int mb_row,
26                               MACROBLOCKD *xd);
27
28 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
29 extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
30
31 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
32 {
33
34
35
36 #if CONFIG_MULTITHREAD
37     VP8_COMMON *const pc = & pbi->common;
38     int i, j;
39
40     for (i = 0; i < count; i++)
41     {
42         MACROBLOCKD *mbd = &mbrd[i].mbd;
43 #if CONFIG_RUNTIME_CPU_DETECT
44         mbd->rtcd = xd->rtcd;
45 #endif
46
47
48         mbd->subpixel_predict        = xd->subpixel_predict;
49         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
50         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
51         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
52         mbd->gf_active_ptr            = xd->gf_active_ptr;
53
54         mbd->mode_info        = pc->mi - 1;
55         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
56         mbd->mode_info_stride  = pc->mode_info_stride;
57
58         mbd->frame_type = pc->frame_type;
59         mbd->frames_since_golden      = pc->frames_since_golden;
60         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
61
62         mbd->pre = pc->last_frame;
63         mbd->dst = pc->new_frame;
64
65
66
67
68         vp8_setup_block_dptrs(mbd);
69         vp8_build_block_doffsets(mbd);
70         mbd->segmentation_enabled    = xd->segmentation_enabled;
71         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
72         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
73
74         mbd->mbmi.mode = DC_PRED;
75         mbd->mbmi.uv_mode = DC_PRED;
76
77         mbd->current_bc = &pbi->bc2;
78
79         for (j = 0; j < 25; j++)
80         {
81             mbd->block[j].dequant = xd->block[j].dequant;
82         }
83     }
84
85 #else
86     (void) pbi;
87     (void) xd;
88     (void) mbrd;
89     (void) count;
90 #endif
91 }
92
93
94 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
95 {
96 #if CONFIG_MULTITHREAD
97     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
98     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
99     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
100     ENTROPY_CONTEXT mb_row_left_context[4][4];
101
102     while (1)
103     {
104         if (pbi->b_multithreaded_rd == 0)
105             break;
106
107         //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
108         if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
109         {
110             if (pbi->b_multithreaded_rd == 0)
111                 break;
112             else
113             {
114                 VP8_COMMON *pc = &pbi->common;
115                 int mb_row       = mbrd->mb_row;
116                 MACROBLOCKD *xd = &mbrd->mbd;
117
118                 //printf("ithread:%d mb_row %d\n", ithread, mb_row);
119                 int i;
120                 int recon_yoffset, recon_uvoffset;
121                 int mb_col;
122                 int recon_y_stride = pc->last_frame.y_stride;
123                 int recon_uv_stride = pc->last_frame.uv_stride;
124
125                 volatile int *last_row_current_mb_col;
126
127                 if (ithread > 0)
128                     last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
129                 else
130                     last_row_current_mb_col = &pbi->current_mb_col_main;
131
132                 recon_yoffset = mb_row * recon_y_stride * 16;
133                 recon_uvoffset = mb_row * recon_uv_stride * 8;
134                 // reset above block coeffs
135
136                 xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
137                 xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
138                 xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
139                 xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
140                 xd->left_context = mb_row_left_context;
141                 vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
142                 xd->up_available = (mb_row != 0);
143
144                 xd->mb_to_top_edge = -((mb_row * 16)) << 3;
145                 xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
146
147                 for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
148                 {
149
150                     while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
151                     {
152                         x86_pause_hint();
153                         thread_sleep(0);
154                     }
155
156                     // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
157                     vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
158
159                     if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
160                     {
161                         for (i = 0; i < 16; i++)
162                         {
163                             BLOCKD *d = &xd->block[i];
164                             vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
165                         }
166                     }
167
168                     // Distance of Mb to the various image edges.
169                     // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
170                     xd->mb_to_left_edge = -((mb_col * 16) << 3);
171                     xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
172
173                     xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
174                     xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
175                     xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
176
177                     xd->left_available = (mb_col != 0);
178
179                     // Select the appropriate reference frame for this MB
180                     if (xd->mbmi.ref_frame == LAST_FRAME)
181                     {
182                         xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
183                         xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
184                         xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
185                     }
186                     else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
187                     {
188                         // Golden frame reconstruction buffer
189                         xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
190                         xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
191                         xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
192                     }
193                     else
194                     {
195                         // Alternate reference frame reconstruction buffer
196                         xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
197                         xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
198                         xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
199                     }
200
201                     vp8_build_uvmvs(xd, pc->full_pixel);
202
203                     vp8dx_bool_decoder_fill(xd->current_bc);
204                     vp8_decode_macroblock(pbi, xd);
205
206
207                     recon_yoffset += 16;
208                     recon_uvoffset += 8;
209
210                     ++xd->mode_info_context;  /* next mb */
211
212                     xd->gf_active_ptr++;      // GF useage flag for next MB
213
214                     xd->above_context[Y1CONTEXT] += 4;
215                     xd->above_context[UCONTEXT ] += 2;
216                     xd->above_context[VCONTEXT ] += 2;
217                     xd->above_context[Y2CONTEXT] ++;
218                     pbi->mb_row_di[ithread].current_mb_col = mb_col;
219
220                 }
221
222                 // adjust to the next row of mbs
223                 vp8_extend_mb_row(
224                     &pc->new_frame,
225                     xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
226                 );
227
228                 ++xd->mode_info_context;      /* skip prediction column */
229
230                 // since we have multithread
231                 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
232
233                 //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
234                 if ((mb_row & 1) == 1)
235                 {
236                     pbi->last_mb_row_decoded = mb_row;
237                     //printf("S%d", pbi->last_mb_row_decoded);
238                 }
239
240                 if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
241                 {
242                     //SetEvent(pbi->h_event_main);
243                     sem_post(&pbi->h_event_main);
244
245                 }
246             }
247         }
248     }
249
250 #else
251     (void) p_data;
252 #endif
253
254     return 0 ;
255 }
256
257 THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
258 {
259 #if CONFIG_MULTITHREAD
260     VP8D_COMP *pbi = (VP8D_COMP *)p_data;
261
262     while (1)
263     {
264         if (pbi->b_multithreaded_lf == 0)
265             break;
266
267         //printf("before waiting for start_lpf\n");
268
269         //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
270         if (sem_wait(&pbi->h_event_start_lpf) == 0)
271         {
272             if (pbi->b_multithreaded_lf == 0) // we're shutting down
273                 break;
274             else
275             {
276
277                 VP8_COMMON *cm  = &pbi->common;
278                 MACROBLOCKD *mbd = &pbi->lpfmb;
279                 int default_filt_lvl = pbi->common.filter_level;
280
281                 YV12_BUFFER_CONFIG *post = &cm->new_frame;
282                 loop_filter_info *lfi = cm->lf_info;
283
284                 int mb_row;
285                 int mb_col;
286
287
288                 int baseline_filter_level[MAX_MB_SEGMENTS];
289                 int filter_level;
290                 int alt_flt_enabled = mbd->segmentation_enabled;
291
292                 int i;
293                 unsigned char *y_ptr, *u_ptr, *v_ptr;
294
295                 volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
296
297                 //MODE_INFO * this_mb_mode_info = cm->mi;
298                 mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
299
300                 // Note the baseline filter values for each segment
301                 if (alt_flt_enabled)
302                 {
303                     for (i = 0; i < MAX_MB_SEGMENTS; i++)
304                     {
305                         if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
306                             baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
307                         else
308                         {
309                             baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
310                             baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
311                         }
312                     }
313                 }
314                 else
315                 {
316                     for (i = 0; i < MAX_MB_SEGMENTS; i++)
317                         baseline_filter_level[i] = default_filt_lvl;
318                 }
319
320                 // Initialize the loop filter for this frame.
321                 vp8_init_loop_filter(cm);
322
323                 // Set up the buffer pointers
324                 y_ptr = post->y_buffer;
325                 u_ptr = post->u_buffer;
326                 v_ptr = post->v_buffer;
327
328                 // vp8_filter each macro block
329                 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
330                 {
331
332                     while (mb_row >= *last_mb_row_decoded)
333                     {
334                         x86_pause_hint();
335                         thread_sleep(0);
336                     }
337
338                     //printf("R%d", mb_row);
339                     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
340                     {
341                         int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
342
343                         filter_level = baseline_filter_level[Segment];
344
345                         // Apply any context driven MB level adjustment
346                         vp8_adjust_mb_lf_value(mbd, &filter_level);
347
348                         if (filter_level)
349                         {
350                             if (mb_col > 0)
351                                 cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
352
353                             if (mbd->mode_info_context->mbmi.dc_diff > 0)
354                                 cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
355
356                             // don't apply across umv border
357                             if (mb_row > 0)
358                                 cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
359
360                             if (mbd->mode_info_context->mbmi.dc_diff > 0)
361                                 cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
362                         }
363
364                         y_ptr += 16;
365                         u_ptr += 8;
366                         v_ptr += 8;
367
368                         mbd->mode_info_context++;     // step to next MB
369
370                     }
371
372                     y_ptr += post->y_stride  * 16 - post->y_width;
373                     u_ptr += post->uv_stride *  8 - post->uv_width;
374                     v_ptr += post->uv_stride *  8 - post->uv_width;
375
376                     mbd->mode_info_context++;         // Skip border mb
377                 }
378
379                 //printf("R%d\n", mb_row);
380                 // When done, signal main thread that ME is finished
381                 //SetEvent(pbi->h_event_lpf);
382                 sem_post(&pbi->h_event_lpf);
383             }
384
385         }
386     }
387
388 #else
389     (void) p_data;
390 #endif
391     return 0;
392 }
393
394 void vp8_decoder_create_threads(VP8D_COMP *pbi)
395 {
396 #if CONFIG_MULTITHREAD
397     int core_count = 0;
398     int ithread;
399
400     pbi->b_multithreaded_rd = 0;
401     pbi->b_multithreaded_lf = 0;
402     pbi->allocated_decoding_thread_count = 0;
403     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
404     if (core_count > 1)
405     {
406         sem_init(&pbi->h_event_lpf, 0, 0);
407         sem_init(&pbi->h_event_start_lpf, 0, 0);
408         pbi->b_multithreaded_lf = 1;
409         pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
410     }
411
412     if (core_count > 1)
413     {
414         pbi->b_multithreaded_rd = 1;
415         pbi->decoding_thread_count = core_count - 1;
416
417         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
418         CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
419         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
420         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
421         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
422
423         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
424         {
425             sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
426
427             pbi->de_thread_data[ithread].ithread  = ithread;
428             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
429             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
430
431             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
432
433         }
434
435         sem_init(&pbi->h_event_main, 0, 0);
436         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
437     }
438
439 #else
440     (void) pbi;
441 #endif
442 }
443
444 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
445 {
446 #if CONFIG_MULTITHREAD
447
448     if (pbi->b_multithreaded_lf)
449     {
450         pbi->b_multithreaded_lf = 0;
451         sem_post(&pbi->h_event_start_lpf);
452         pthread_join(pbi->h_thread_lpf, 0);
453         sem_destroy(&pbi->h_event_start_lpf);
454     }
455
456     //shutdown MB Decoding thread;
457     if (pbi->b_multithreaded_rd)
458     {
459         pbi->b_multithreaded_rd = 0;
460         // allow all threads to exit
461         {
462             int i;
463
464             for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
465             {
466
467                 sem_post(&pbi->h_event_mbrdecoding[i]);
468                 pthread_join(pbi->h_decoding_thread[i], NULL);
469             }
470         }
471         {
472
473             int i;
474             for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
475             {
476                 sem_destroy(&pbi->h_event_mbrdecoding[i]);
477             }
478
479
480         }
481
482         sem_destroy(&pbi->h_event_main);
483
484         if (pbi->h_decoding_thread)
485         {
486             vpx_free(pbi->h_decoding_thread);
487             pbi->h_decoding_thread = NULL;
488         }
489
490         if (pbi->h_event_mbrdecoding)
491         {
492             vpx_free(pbi->h_event_mbrdecoding);
493             pbi->h_event_mbrdecoding = NULL;
494         }
495
496         if (pbi->mb_row_di)
497         {
498             vpx_free(pbi->mb_row_di);
499             pbi->mb_row_di = NULL ;
500         }
501
502         if (pbi->de_thread_data)
503         {
504             vpx_free(pbi->de_thread_data);
505             pbi->de_thread_data = NULL;
506         }
507     }
508
509 #else
510     (void) pbi;
511 #endif
512 }
513
514
515 void vp8_start_lfthread(VP8D_COMP *pbi)
516 {
517 #if CONFIG_MULTITHREAD
518     memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
519     pbi->last_mb_row_decoded = 0;
520     sem_post(&pbi->h_event_start_lpf);
521 #else
522     (void) pbi;
523 #endif
524 }
525
526 void vp8_stop_lfthread(VP8D_COMP *pbi)
527 {
528 #if CONFIG_MULTITHREAD
529     struct vpx_usec_timer timer;
530
531     vpx_usec_timer_start(&timer);
532
533     sem_wait(&pbi->h_event_lpf);
534
535     vpx_usec_timer_mark(&timer);
536     pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
537 #else
538     (void) pbi;
539 #endif
540 }
541
542
543 void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
544                           MACROBLOCKD *xd)
545 {
546 #if CONFIG_MULTITHREAD
547     int mb_row;
548     VP8_COMMON *pc = &pbi->common;
549
550     int ibc = 0;
551     int num_part = 1 << pbi->common.multi_token_partition;
552
553     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
554
555     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
556     {
557         int i;
558         pbi->current_mb_col_main = -1;
559
560         xd->current_bc = &pbi->mbc[ibc];
561         ibc++ ;
562
563         if (ibc == num_part)
564             ibc = 0;
565
566         for (i = 0; i < pbi->decoding_thread_count; i++)
567         {
568             if ((mb_row + i + 1) >= pc->mb_rows)
569                 break;
570
571             pbi->mb_row_di[i].mb_row = mb_row + i + 1;
572             pbi->mb_row_di[i].mbd.current_bc =  &pbi->mbc[ibc];
573             ibc++;
574
575             if (ibc == num_part)
576                 ibc = 0;
577
578             pbi->mb_row_di[i].current_mb_col = -1;
579             sem_post(&pbi->h_event_mbrdecoding[i]);
580         }
581
582         vp8_decode_mb_row(pbi, pc, mb_row, xd);
583
584         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
585
586         if (mb_row < pc->mb_rows - 1)
587         {
588             sem_wait(&pbi->h_event_main);
589         }
590     }
591
592     pbi->last_mb_row_decoded = mb_row;
593 #else
594     (void) pbi;
595     (void) xd;
596 #endif
597 }