1 dnl Alpha ev67 mpn_hamdist -- mpn hamming distance.
3 dnl Copyright 2003, 2005 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
23 C ev67: 2.5 cycles/limb
26 C unsigned long mpn_hamdist (mp_srcptr xp, mp_srcptr yp, mp_size_t size);
28 C The hope was for 2.0 c/l here, but that isn't achieved. We're limited by
29 C renaming register shortage. Since we need 5 instructions per limb, further
30 C unrolling could approach 1.5 c/l.
32 C The main loop processes two limbs from each operand on each iteration. An
33 C odd size is handled by processing xp[0]^yp[0] at the start. If the size
34 C is even that result is discarded, and is repeated by the main loop.
44 ldq r1, 0(r16) C L0 xp[0]
45 ldq r2, 0(r17) C L1 yp[0]
46 and r18, 1, r8 C U1 1 if size odd
47 srl r18, 1, r18 C U0 size, limb pairs
49 clr r0 C L0 initial total
50 s8addq r8, r17, r17 C U1 yp++ if size odd
51 s8addq r8, r16, r16 C L1 xp++ if size odd
52 clr r6 C U0 dummy initial xor 1
54 xor r1, r2, r5 C L initial xor 0
55 beq r18, L(one) C U if size==1
57 cmoveq r8, r31, r5 C L discard first limb if size even
63 C r0 total accumulating
66 C r16 xp, incrementing
67 C r17 yp, incrementing
68 C r18 size, limb pairs, decrementing
80 ldl r31, 256(r16) C L prefetch
81 ldl r31, 256(r17) C L prefetch
97 ret r31, (r26), 1 C L0