From 789ae447f8d00bb69f5e58ad314b9686c53f4f6d Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Wed, 4 Feb 2015 12:02:06 -0800 Subject: [PATCH] Fix high bit depth assembly function bugs The high bit depth build failed while building for 32bit target. The bugs were in vp9_highbd_subpel_variance.asm and vp9_highbd_sad4d_sse2.asm functions. This patch fixed the bugs, and made 32bit build work. Change-Id: Idc8e5e1b7965bb70d4afba140c6583c5d9666b75 --- vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm | 19 ++++---- vp9/encoder/x86/vp9_highbd_subpel_variance.asm | 66 +++++++++++++++----------- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm index 986efb1..f79a59f 100644 --- a/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm +++ b/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm @@ -215,13 +215,20 @@ SECTION .text ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 %macro HIGH_SADNXN4D 2 %if UNIX64 -cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \ - res, ref2, ref3, ref4, one +cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ + res, ref2, ref3, ref4 %else -cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \ - ref2, ref3, ref4, one +cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ + ref2, ref3, ref4 %endif +; set m1 + push srcq + mov srcd, 0x00010001 + movd m1, srcd + pshufd m1, m1, 0x0 + pop srcq + movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided mov ref2q, [ref1q+gprsize*1] @@ -236,10 +243,6 @@ cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \ shl ref4q, 1 shl ref1q, 1 - mov oned, 0x00010001 - movd m1, oned - pshufd m1, m1, 0x0 - HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 %rep (%2-4)/2 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 diff --git a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm index aebe63b..987729f 100644 --- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm +++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm @@ -199,6 +199,9 @@ SECTION .text %if %1 < 16 sar h, 1 %endif +%if %2 == 1 ; avg + shl sec_str, 1 +%endif ; FIXME(rbultje) replace by jumptable? test x_offsetd, x_offsetd @@ -223,7 +226,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -232,14 +235,15 @@ SECTION .text mova m3, [dstq + dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m2, [secq + sec_str*2] + add secq, sec_str + pavgw m2, [secq] %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -270,7 +274,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -282,14 +286,15 @@ SECTION .text pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m1, [secq+sec_str*2] + add secq, sec_str + pavgw m1, [secq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -358,7 +363,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -379,14 +384,15 @@ SECTION .text psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m1, [secq+sec_str*2] + add secq, sec_str + pavgw m1, [secq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -423,7 +429,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -436,14 +442,15 @@ SECTION .text pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m1, [secq+sec_str*2] + add secq, sec_str + pavgw m1, [secq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -485,7 +492,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -505,7 +512,8 @@ SECTION .text mova m5, [dstq + dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m2, [secq+sec_str*2] + add secq, sec_str + pavgw m2, [secq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 @@ -513,7 +521,7 @@ SECTION .text lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -590,7 +598,7 @@ SECTION .text lea srcq, [srcq + src_strideq*2] lea dstq, [dstq + dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -620,7 +628,8 @@ SECTION .text mova m3, [dstq+dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m4, [secq+sec_str*2] + add secq, sec_str + pavgw m4, [secq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 @@ -628,7 +637,7 @@ SECTION .text lea srcq, [srcq + src_strideq*4] lea dstq, [dstq + dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -698,7 +707,7 @@ SECTION .text lea srcq, [srcq+src_strideq*2] lea dstq, [dstq+dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -719,14 +728,15 @@ SECTION .text psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m1, [secq+sec_str*2] + add secq, sec_str + pavgw m1, [secq] %endif SUM_SSE m0, m4, m1, m5, m6, m7 lea srcq, [srcq+src_strideq*4] lea dstq, [dstq+dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -815,7 +825,7 @@ SECTION .text lea srcq, [srcq+src_strideq*2] lea dstq, [dstq+dst_strideq*2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -847,7 +857,8 @@ SECTION .text pavgw m2, m3 %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m2, [secq+sec_str*2] + add secq, sec_str + pavgw m2, [secq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 @@ -855,7 +866,7 @@ SECTION .text lea srcq, [srcq+src_strideq*4] lea dstq, [dstq+dst_strideq*4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h @@ -969,7 +980,7 @@ SECTION .text INC_SRC_BY_SRC_STRIDE lea dstq, [dstq + dst_strideq * 2] %if %2 == 1 ; avg - lea secq, [secq + sec_str*2] + add secq, sec_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -1013,7 +1024,8 @@ SECTION .text mova m3, [dstq+dst_strideq*2] %if %2 == 1 ; avg pavgw m0, [secq] - pavgw m4, [secq+sec_str*2] + add secq, sec_str + pavgw m4, [secq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 @@ -1021,7 +1033,7 @@ SECTION .text INC_SRC_BY_SRC_2STRIDE lea dstq, [dstq + dst_strideq * 4] %if %2 == 1 ; avg - lea secq, [secq + sec_str*4] + add secq, sec_str %endif %endif dec h -- 2.7.4