add a trailing ':', though it's optional with the tools we support, it's
more common to use it to mark a label. this also quiets the
orphan-labels warning with nasm/yasm.
BUG=b/
29583530
Change-Id: I46e95255e12026dd542d9838e2dd3fbddf7b56e2
mov rcx, 16 ; loop count
pxor xmm6, xmm6
-.combine
+.combine:
movdqa xmm2, [rax]
movdqa xmm4, [rdx]
add rax, rsi
mov rcx, 8 ; loop count
pxor xmm4, xmm4
-.combine
+.combine:
movq xmm2, [rax]
movq xmm3, [rdx]
add rax, rsi
; Because we're working with the actual output frames
; we can't depend on any kind of data alignment.
-.accumulate
+.accumulate:
movdqa xmm0, [rax] ; src1
movdqa xmm1, [rdx] ; src2
add rax, rcx ; src1 + stride1
lea rdi, [rdi+rdx]
movq mm1, QWORD ptr[rdi] ; first row
mov rcx, 8
-.init_borderd ; initialize borders
+.init_borderd: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], mm1
mov rdi, rsi
movq mm1, QWORD ptr[rdi] ; first row
mov rcx, 8
-.init_border ; initialize borders
+.init_border: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], mm1
movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4]
movd [rsi], mm1
-.skip_assignment
+.skip_assignment:
lea rsi, [rsi+rax]
lea rdi, [rdi+rax]
UPDATE_FLIMIT
jmp .acrossnextcol
-.acrossdone
+.acrossdone:
; last 16 pixels
movq QWORD PTR [rdi+rdx-16], mm0
lea rdi, [rdi+rdx]
movq xmm1, QWORD ptr[rdi] ; first row
mov rcx, 8
-.init_borderd ; initialize borders
+.init_borderd: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], xmm1
mov rdi, rsi
movq xmm1, QWORD ptr[rdi] ; first row
mov rcx, 8
-.init_border ; initialize borders
+.init_border: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], xmm1
movq mm0, [rsp + rcx*8] ;d[rcx*8]
movq [rsi], mm0
-.skip_assignment
+.skip_assignment:
lea rsi, [rsi+rax]
lea rdi, [rdi+rax]
mov rcx, 16 ; loop count
pxor xmm6, xmm6
-.combine
+.combine:
movdqa xmm2, [rax]
movdqa xmm4, [rdx]
add rax, rsi
mov rcx, 8 ; loop count
pxor xmm4, xmm4
-.combine
+.combine:
movq xmm2, [rax]
movq xmm3, [rdx]
add rax, rsi
; Because we're working with the actual output frames
; we can't depend on any kind of data alignment.
-.accumulate
+.accumulate:
movdqa xmm0, [rax] ; src1
movdqa xmm1, [rdx] ; src2
add rax, rcx ; src1 + stride1
psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word]
movq m2, [leftq]
punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word]
-.loop
+.loop:
pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word]
pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word]
punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word]
RET
%endif
-.w64
+.w64:
mov r4d, dword hm
.loop64:
movu m0, [srcq]
pavgb m1, m0
%endif
movd [dstq], m1
-.done
+.done:
RET
%endm