This avoids potential "bleeding" when size == 0 as cache line would be
dirtied (and possibly fetched from other cores) and due to the same
reaons more optimal too.
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
#endif
ENTRY_CFI(memset)
- PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
mov r3, r0 ; don't clobber ret val
+ PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
+
;;; if length < 8
brls.d.nt r2, 8, .Lsmallchunk
mov.f lp_count,r2