);
return coeff_count;
}
+
+static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
+ int minusindex= 4-(int)index;
+ int coeff_count;
+ int last=0;
+ asm volatile(
+ "movl "RANGE "(%3), %%esi \n\t"
+ "movl "LOW "(%3), %%ebx \n\t"
+
+ "mov %1, %%edi \n\t"
+ "2: \n\t"
+
+ "mov %6, %%eax \n\t"
+ "movzbl (%%eax, %%edi), %%edi \n\t"
+ "add %5, %%edi \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+ "mov %1, %%edi \n\t"
+ "test $1, %%edx \n\t"
+ " jz 3f \n\t"
+
+ "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
+ "add %5, %%edi \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+ "movl %2, %%eax \n\t"
+ "mov %1, %%edi \n\t"
+ "movl %%edi, (%%eax) \n\t"
+
+ "test $1, %%edx \n\t"
+ " jnz 4f \n\t"
+
+ "addl $4, %%eax \n\t"
+ "movl %%eax, %2 \n\t"
+
+ "3: \n\t"
+ "addl $1, %%edi \n\t"
+ "mov %%edi, %1 \n\t"
+ "cmpl $63, %%edi \n\t"
+ " jb 2b \n\t"
+ "movl %2, %%eax \n\t"
+ "movl %%edi, (%%eax) \n\t"
+ "4: \n\t"
+ "addl %4, %%eax \n\t"
+ "shr $2, %%eax \n\t"
+
+ "movl %%esi, "RANGE "(%3) \n\t"
+ "movl %%ebx, "LOW "(%3) \n\t"
+ :"=&a"(coeff_count),"+m"(last), "+m"(index)\
+ :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
+ : "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\
+ );
+ return coeff_count;
+}
#endif
/**
return ctx + 4 * cat;
}
+static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
+
static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
static const int significant_coeff_flag_offset[2][6] = {
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
};
- static const uint8_t last_coeff_flag_offset_8x8[63] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
- };
int index[64];
index[coeff_count++] = last;\
}
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
- DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
- } else {
#ifdef ARCH_X86
+ coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+ } else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
#else
+ DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
+ } else {
DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
#endif
}