From 9b4b854b8e6fd07dd85f81329921adf61f43b5c9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 6 Jun 2008 10:57:06 -0700 Subject: [PATCH] core: do aligned transfers in bcopy32 Always align the destination in transfers in bcopy32. We should also do this in the varous other implementations, especially in com32. --- core/bcopy32.inc | 123 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 104 insertions(+), 19 deletions(-) diff --git a/core/bcopy32.inc b/core/bcopy32.inc index fd14409..8f36d64 100644 --- a/core/bcopy32.inc +++ b/core/bcopy32.inc @@ -94,13 +94,14 @@ bcopy_gdt_size: equ $-bcopy_gdt ; ESI - first byte after source (garbage if ESI == -1 on entry) ; EDI - first byte after target ; -bcopy: pushad +bcopy: jecxz .ret + pushad push word pm_bcopy call simple_pm_call popad add edi,ecx add esi,ecx - ret +.ret: ret ; ; This routine is used to invoke a simple routine in 16-bit protected @@ -203,6 +204,10 @@ simple_pm_call: ; pm_bcopy: ; ; This is the protected-mode core of the "bcopy" routine. +; Try to do aligned transfers; if the src and dst are relatively +; misaligned, align the dst. +; +; ECX is guaranteed to not be zero on entry. ; pm_bcopy: cmp esi,-1 @@ -212,44 +217,124 @@ pm_bcopy: jb .reverse ; have to copy backwards .forward: + ; Initial alignment + mov dx,di + shr dx,1 + jnc .faa1 + a32 movsb + dec ecx +.faa1: + mov al,cl + cmp ecx,2 + jb .f_tiny + + shr dx,1 + jnc .faa2 + a32 movsw + sub ecx,2 +.faa2: + + ; Bulk transfer mov al,cl ; Save low bits - and al,3 shr ecx,2 ; Convert to dwords a32 rep movsd ; Do our business ; At this point ecx == 0 - mov cl,al ; Copy any fractional dword - a32 rep movsb + test al,2 + jz .fab2 + a32 movsw +.fab2: +.f_tiny: + test al,1 + jz .fab1 + a32 movsb +.fab1: ret .reverse: std ; Reverse copy + lea esi,[esi+ecx-1] ; Point to final byte lea edi,[edi+ecx-1] - mov eax,ecx - and ecx,3 - shr eax,2 - a32 rep movsb - - ; Change ESI/EDI to point to the last dword, instead - ; of the last byte. - sub esi,3 - sub edi,3 - mov ecx,eax + + ; Initial alignment + mov dx,di + shr dx,1 + jnc .raa1 + a32 movsb + dec ecx +.raa1: + + dec esi + dec edi + mov al,cl + cmp ecx,2 + jb .r_tiny + shr dx,1 + jnc .raa2 + a32 movsw + sub ecx,2 +.raa2: + + ; Bulk copy + sub esi,2 + sub edi,2 + mov al,cl ; Save low bits + shr ecx,2 a32 rep movsd + ; Final alignment +.r_final: + add esi,2 + add edi,2 + test al,2 + jz .rab2 + a32 movsw +.rab2: +.r_tiny: + inc esi + inc edi + test al,1 + jz .rab1 + a32 movsb +.rab1: cld ret .bzero: xor eax,eax - mov si,cx ; Save low bits - and si,3 + + ; Initial alignment + mov dx,di + shr dx,1 + jnc .zaa1 + a32 stosb + dec ecx +.zaa1: + + mov bl,cl + cmp ecx,2 + jb .z_tiny + shr dx,1 + jnc .zaa2 + a32 stosw + sub ecx,2 +.zaa2: + + ; Bulk + mov bl,cl ; Save low bits shr ecx,2 a32 rep stosd - mov cx,si ; Write fractional dword - a32 rep stosb + test bl,2 + jz .zab2 + a32 stosw +.zab2: +.z_tiny: + test bl,1 + jz .zab1 + a32 stosb +.zab1: ret ; -- 2.7.4