From 58e0159c80789da2257463624f56935988e1e7c1 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Tue, 24 Feb 2015 10:37:05 -0800 Subject: [PATCH] Fix ssse3 quantize_fp functions while skip=1 In ssse3 functions, DEFINE_ARGS macro hard codes qcoeff and dqcoeff to r3 and r4. If skip is 1, qcoeff and dqcoeff need to be loaded from the stack, which doesn't work because of the above definitions. Currently, skip=1 case is not used in the encoder. This patch fixed the issue, so it can be turned on later. Change-Id: I998d696b1a7a85dca2b3bcee790b21c21e039147 --- vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm | 67 ++++++++++++++------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index 72e01d6..c35eb36 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm @@ -15,6 +15,7 @@ pw_1: times 8 dw 1 SECTION .text +; TODO(yunqingwang)fix quantize_b code for skip=1 case. %macro QUANTIZE_FN 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ shift, qcoeff, dqcoeff, dequant, \ @@ -244,11 +245,11 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psllw m2, 1 %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob + lea coeffq, [ coeffq+ncoeffq*2] - lea iscanq, [ iscanq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] - lea dqcoeffq, [dqcoeffq+ncoeffq*2] + lea r5q, [ r5q+ncoeffq*2] + lea r3q, [ r3q+ncoeffq*2] + lea r4q, [r4q+ncoeffq*2] neg ncoeffq ; get DC and first 15 AC coeffs @@ -266,15 +267,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m8, m9 ; m8 = reinsert sign psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m8 - mova [qcoeffq+ncoeffq*2+16], m13 + mova [r3q+ncoeffq*2+ 0], m8 + mova [r3q+ncoeffq*2+16], m13 %ifidn %1, fp_32x32 pabsw m8, m8 pabsw m13, m13 %endif - pmullw m8, m3 ; dqc[i] = qc[i] * q + pmullw m8, m3 ; r4[i] = r3[i] * q punpckhqdq m3, m3 - pmullw m13, m3 ; dqc[i] = qc[i] * q + pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m8, 1 psrlw m13, 1 @@ -282,12 +283,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ psignw m13, m10 psrlw m0, m3, 2 %endif - mova [dqcoeffq+ncoeffq*2+ 0], m8 - mova [dqcoeffq+ncoeffq*2+16], m13 + mova [r4q+ncoeffq*2+ 0], m8 + mova [r4q+ncoeffq*2+16], m13 pcmpeqw m8, m5 ; m8 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m8, m6 ; m8 = max(eob) @@ -318,26 +319,26 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m14, m9 ; m14 = reinsert sign psignw m13, m10 ; m13 = reinsert sign - mova [qcoeffq+ncoeffq*2+ 0], m14 - mova [qcoeffq+ncoeffq*2+16], m13 + mova [r3q+ncoeffq*2+ 0], m14 + mova [r3q+ncoeffq*2+16], m13 %ifidn %1, fp_32x32 pabsw m14, m14 pabsw m13, m13 %endif - pmullw m14, m3 ; dqc[i] = qc[i] * q - pmullw m13, m3 ; dqc[i] = qc[i] * q + pmullw m14, m3 ; r4[i] = r3[i] * q + pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m14, 1 psrlw m13, 1 psignw m14, m9 psignw m13, m10 %endif - mova [dqcoeffq+ncoeffq*2+ 0], m14 - mova [dqcoeffq+ncoeffq*2+16], m13 + mova [r4q+ncoeffq*2+ 0], m14 + mova [r4q+ncoeffq*2+16], m13 pcmpeqw m14, m5 ; m14 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m14, m6 ; m14 = max(eob) @@ -350,10 +351,10 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %ifidn %1, fp_32x32 jmp .accumulate_eob .skip_iter: - mova [qcoeffq+ncoeffq*2+ 0], m5 - mova [qcoeffq+ncoeffq*2+16], m5 - mova [dqcoeffq+ncoeffq*2+ 0], m5 - mova [dqcoeffq+ncoeffq*2+16], m5 + mova [r3q+ncoeffq*2+ 0], m5 + mova [r3q+ncoeffq*2+16], m5 + mova [r4q+ncoeffq*2+ 0], m5 + mova [r4q+ncoeffq*2+16], m5 add ncoeffq, mmsize jl .ac_only_loop %endif @@ -368,7 +369,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pshuflw m7, m8, 0x1 pmaxsw m8, m7 pextrw r6, m8, 0 - mov [r2], r6 + mov [r2], r6 RET ; skip-block, i.e. just write all zeroes @@ -377,19 +378,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ movifnidn ncoeffq, ncoeffmp mov r2, qcoeffmp mov r3, eobmp - DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob - lea dqcoeffq, [dqcoeffq+ncoeffq*2] - lea qcoeffq, [ qcoeffq+ncoeffq*2] + + lea r0q, [r0q+ncoeffq*2] + lea r2q, [r2q+ncoeffq*2] neg ncoeffq pxor m7, m7 .blank_loop: - mova [dqcoeffq+ncoeffq*2+ 0], m7 - mova [dqcoeffq+ncoeffq*2+16], m7 - mova [qcoeffq+ncoeffq*2+ 0], m7 - mova [qcoeffq+ncoeffq*2+16], m7 + mova [r0q+ncoeffq*2+ 0], m7 + mova [r0q+ncoeffq*2+16], m7 + mova [r2q+ncoeffq*2+ 0], m7 + mova [r2q+ncoeffq*2+16], m7 add ncoeffq, mmsize jl .blank_loop - mov word [eobq], 0 + mov word [r3q], 0 RET %endmacro -- 2.7.4