MIPS: Add missing EHB in mtc0 -> mfc0 sequence.
[platform/kernel/linux-rpi.git] / lib / xxhash.c
1 /*
2  * xxHash - Extremely Fast Hash algorithm
3  * Copyright (C) 2012-2016, Yann Collet.
4  *
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *   * Redistributions of source code must retain the above copyright
12  *     notice, this list of conditions and the following disclaimer.
13  *   * Redistributions in binary form must reproduce the above
14  *     copyright notice, this list of conditions and the following disclaimer
15  *     in the documentation and/or other materials provided with the
16  *     distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * This program is free software; you can redistribute it and/or modify it under
31  * the terms of the GNU General Public License version 2 as published by the
32  * Free Software Foundation. This program is dual-licensed; you may select
33  * either version 2 of the GNU General Public License ("GPL") or BSD license
34  * ("BSD").
35  *
36  * You can contact the author at:
37  * - xxHash homepage: http://cyan4973.github.io/xxHash/
38  * - xxHash source repository: https://github.com/Cyan4973/xxHash
39  */
40
41 #include <asm/unaligned.h>
42 #include <linux/errno.h>
43 #include <linux/compiler.h>
44 #include <linux/kernel.h>
45 #include <linux/module.h>
46 #include <linux/string.h>
47 #include <linux/xxhash.h>
48
49 /*-*************************************
50  * Macros
51  **************************************/
52 #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
53 #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
54
55 #ifdef __LITTLE_ENDIAN
56 # define XXH_CPU_LITTLE_ENDIAN 1
57 #else
58 # define XXH_CPU_LITTLE_ENDIAN 0
59 #endif
60
61 /*-*************************************
62  * Constants
63  **************************************/
64 static const uint32_t PRIME32_1 = 2654435761U;
65 static const uint32_t PRIME32_2 = 2246822519U;
66 static const uint32_t PRIME32_3 = 3266489917U;
67 static const uint32_t PRIME32_4 =  668265263U;
68 static const uint32_t PRIME32_5 =  374761393U;
69
70 static const uint64_t PRIME64_1 = 11400714785074694791ULL;
71 static const uint64_t PRIME64_2 = 14029467366897019727ULL;
72 static const uint64_t PRIME64_3 =  1609587929392839161ULL;
73 static const uint64_t PRIME64_4 =  9650029242287828579ULL;
74 static const uint64_t PRIME64_5 =  2870177450012600261ULL;
75
76 /*-**************************
77  *  Utils
78  ***************************/
79 void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
80 {
81         memcpy(dst, src, sizeof(*dst));
82 }
83 EXPORT_SYMBOL(xxh32_copy_state);
84
85 void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
86 {
87         memcpy(dst, src, sizeof(*dst));
88 }
89 EXPORT_SYMBOL(xxh64_copy_state);
90
91 /*-***************************
92  * Simple Hash Functions
93  ****************************/
94 static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
95 {
96         seed += input * PRIME32_2;
97         seed = xxh_rotl32(seed, 13);
98         seed *= PRIME32_1;
99         return seed;
100 }
101
102 uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
103 {
104         const uint8_t *p = (const uint8_t *)input;
105         const uint8_t *b_end = p + len;
106         uint32_t h32;
107
108         if (len >= 16) {
109                 const uint8_t *const limit = b_end - 16;
110                 uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
111                 uint32_t v2 = seed + PRIME32_2;
112                 uint32_t v3 = seed + 0;
113                 uint32_t v4 = seed - PRIME32_1;
114
115                 do {
116                         v1 = xxh32_round(v1, get_unaligned_le32(p));
117                         p += 4;
118                         v2 = xxh32_round(v2, get_unaligned_le32(p));
119                         p += 4;
120                         v3 = xxh32_round(v3, get_unaligned_le32(p));
121                         p += 4;
122                         v4 = xxh32_round(v4, get_unaligned_le32(p));
123                         p += 4;
124                 } while (p <= limit);
125
126                 h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
127                         xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
128         } else {
129                 h32 = seed + PRIME32_5;
130         }
131
132         h32 += (uint32_t)len;
133
134         while (p + 4 <= b_end) {
135                 h32 += get_unaligned_le32(p) * PRIME32_3;
136                 h32 = xxh_rotl32(h32, 17) * PRIME32_4;
137                 p += 4;
138         }
139
140         while (p < b_end) {
141                 h32 += (*p) * PRIME32_5;
142                 h32 = xxh_rotl32(h32, 11) * PRIME32_1;
143                 p++;
144         }
145
146         h32 ^= h32 >> 15;
147         h32 *= PRIME32_2;
148         h32 ^= h32 >> 13;
149         h32 *= PRIME32_3;
150         h32 ^= h32 >> 16;
151
152         return h32;
153 }
154 EXPORT_SYMBOL(xxh32);
155
156 static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
157 {
158         acc += input * PRIME64_2;
159         acc = xxh_rotl64(acc, 31);
160         acc *= PRIME64_1;
161         return acc;
162 }
163
164 static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
165 {
166         val = xxh64_round(0, val);
167         acc ^= val;
168         acc = acc * PRIME64_1 + PRIME64_4;
169         return acc;
170 }
171
172 uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
173 {
174         const uint8_t *p = (const uint8_t *)input;
175         const uint8_t *const b_end = p + len;
176         uint64_t h64;
177
178         if (len >= 32) {
179                 const uint8_t *const limit = b_end - 32;
180                 uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
181                 uint64_t v2 = seed + PRIME64_2;
182                 uint64_t v3 = seed + 0;
183                 uint64_t v4 = seed - PRIME64_1;
184
185                 do {
186                         v1 = xxh64_round(v1, get_unaligned_le64(p));
187                         p += 8;
188                         v2 = xxh64_round(v2, get_unaligned_le64(p));
189                         p += 8;
190                         v3 = xxh64_round(v3, get_unaligned_le64(p));
191                         p += 8;
192                         v4 = xxh64_round(v4, get_unaligned_le64(p));
193                         p += 8;
194                 } while (p <= limit);
195
196                 h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
197                         xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
198                 h64 = xxh64_merge_round(h64, v1);
199                 h64 = xxh64_merge_round(h64, v2);
200                 h64 = xxh64_merge_round(h64, v3);
201                 h64 = xxh64_merge_round(h64, v4);
202
203         } else {
204                 h64  = seed + PRIME64_5;
205         }
206
207         h64 += (uint64_t)len;
208
209         while (p + 8 <= b_end) {
210                 const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
211
212                 h64 ^= k1;
213                 h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
214                 p += 8;
215         }
216
217         if (p + 4 <= b_end) {
218                 h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
219                 h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
220                 p += 4;
221         }
222
223         while (p < b_end) {
224                 h64 ^= (*p) * PRIME64_5;
225                 h64 = xxh_rotl64(h64, 11) * PRIME64_1;
226                 p++;
227         }
228
229         h64 ^= h64 >> 33;
230         h64 *= PRIME64_2;
231         h64 ^= h64 >> 29;
232         h64 *= PRIME64_3;
233         h64 ^= h64 >> 32;
234
235         return h64;
236 }
237 EXPORT_SYMBOL(xxh64);
238
239 /*-**************************************************
240  * Advanced Hash Functions
241  ***************************************************/
242 void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
243 {
244         /* use a local state for memcpy() to avoid strict-aliasing warnings */
245         struct xxh32_state state;
246
247         memset(&state, 0, sizeof(state));
248         state.v1 = seed + PRIME32_1 + PRIME32_2;
249         state.v2 = seed + PRIME32_2;
250         state.v3 = seed + 0;
251         state.v4 = seed - PRIME32_1;
252         memcpy(statePtr, &state, sizeof(state));
253 }
254 EXPORT_SYMBOL(xxh32_reset);
255
256 void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
257 {
258         /* use a local state for memcpy() to avoid strict-aliasing warnings */
259         struct xxh64_state state;
260
261         memset(&state, 0, sizeof(state));
262         state.v1 = seed + PRIME64_1 + PRIME64_2;
263         state.v2 = seed + PRIME64_2;
264         state.v3 = seed + 0;
265         state.v4 = seed - PRIME64_1;
266         memcpy(statePtr, &state, sizeof(state));
267 }
268 EXPORT_SYMBOL(xxh64_reset);
269
270 int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
271 {
272         const uint8_t *p = (const uint8_t *)input;
273         const uint8_t *const b_end = p + len;
274
275         if (input == NULL)
276                 return -EINVAL;
277
278         state->total_len_32 += (uint32_t)len;
279         state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
280
281         if (state->memsize + len < 16) { /* fill in tmp buffer */
282                 memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
283                 state->memsize += (uint32_t)len;
284                 return 0;
285         }
286
287         if (state->memsize) { /* some data left from previous update */
288                 const uint32_t *p32 = state->mem32;
289
290                 memcpy((uint8_t *)(state->mem32) + state->memsize, input,
291                         16 - state->memsize);
292
293                 state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
294                 p32++;
295                 state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
296                 p32++;
297                 state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
298                 p32++;
299                 state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
300                 p32++;
301
302                 p += 16-state->memsize;
303                 state->memsize = 0;
304         }
305
306         if (p <= b_end - 16) {
307                 const uint8_t *const limit = b_end - 16;
308                 uint32_t v1 = state->v1;
309                 uint32_t v2 = state->v2;
310                 uint32_t v3 = state->v3;
311                 uint32_t v4 = state->v4;
312
313                 do {
314                         v1 = xxh32_round(v1, get_unaligned_le32(p));
315                         p += 4;
316                         v2 = xxh32_round(v2, get_unaligned_le32(p));
317                         p += 4;
318                         v3 = xxh32_round(v3, get_unaligned_le32(p));
319                         p += 4;
320                         v4 = xxh32_round(v4, get_unaligned_le32(p));
321                         p += 4;
322                 } while (p <= limit);
323
324                 state->v1 = v1;
325                 state->v2 = v2;
326                 state->v3 = v3;
327                 state->v4 = v4;
328         }
329
330         if (p < b_end) {
331                 memcpy(state->mem32, p, (size_t)(b_end-p));
332                 state->memsize = (uint32_t)(b_end-p);
333         }
334
335         return 0;
336 }
337 EXPORT_SYMBOL(xxh32_update);
338
339 uint32_t xxh32_digest(const struct xxh32_state *state)
340 {
341         const uint8_t *p = (const uint8_t *)state->mem32;
342         const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
343                 state->memsize;
344         uint32_t h32;
345
346         if (state->large_len) {
347                 h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
348                         xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
349         } else {
350                 h32 = state->v3 /* == seed */ + PRIME32_5;
351         }
352
353         h32 += state->total_len_32;
354
355         while (p + 4 <= b_end) {
356                 h32 += get_unaligned_le32(p) * PRIME32_3;
357                 h32 = xxh_rotl32(h32, 17) * PRIME32_4;
358                 p += 4;
359         }
360
361         while (p < b_end) {
362                 h32 += (*p) * PRIME32_5;
363                 h32 = xxh_rotl32(h32, 11) * PRIME32_1;
364                 p++;
365         }
366
367         h32 ^= h32 >> 15;
368         h32 *= PRIME32_2;
369         h32 ^= h32 >> 13;
370         h32 *= PRIME32_3;
371         h32 ^= h32 >> 16;
372
373         return h32;
374 }
375 EXPORT_SYMBOL(xxh32_digest);
376
377 int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
378 {
379         const uint8_t *p = (const uint8_t *)input;
380         const uint8_t *const b_end = p + len;
381
382         if (input == NULL)
383                 return -EINVAL;
384
385         state->total_len += len;
386
387         if (state->memsize + len < 32) { /* fill in tmp buffer */
388                 memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
389                 state->memsize += (uint32_t)len;
390                 return 0;
391         }
392
393         if (state->memsize) { /* tmp buffer is full */
394                 uint64_t *p64 = state->mem64;
395
396                 memcpy(((uint8_t *)p64) + state->memsize, input,
397                         32 - state->memsize);
398
399                 state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
400                 p64++;
401                 state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
402                 p64++;
403                 state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
404                 p64++;
405                 state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
406
407                 p += 32 - state->memsize;
408                 state->memsize = 0;
409         }
410
411         if (p + 32 <= b_end) {
412                 const uint8_t *const limit = b_end - 32;
413                 uint64_t v1 = state->v1;
414                 uint64_t v2 = state->v2;
415                 uint64_t v3 = state->v3;
416                 uint64_t v4 = state->v4;
417
418                 do {
419                         v1 = xxh64_round(v1, get_unaligned_le64(p));
420                         p += 8;
421                         v2 = xxh64_round(v2, get_unaligned_le64(p));
422                         p += 8;
423                         v3 = xxh64_round(v3, get_unaligned_le64(p));
424                         p += 8;
425                         v4 = xxh64_round(v4, get_unaligned_le64(p));
426                         p += 8;
427                 } while (p <= limit);
428
429                 state->v1 = v1;
430                 state->v2 = v2;
431                 state->v3 = v3;
432                 state->v4 = v4;
433         }
434
435         if (p < b_end) {
436                 memcpy(state->mem64, p, (size_t)(b_end-p));
437                 state->memsize = (uint32_t)(b_end - p);
438         }
439
440         return 0;
441 }
442 EXPORT_SYMBOL(xxh64_update);
443
444 uint64_t xxh64_digest(const struct xxh64_state *state)
445 {
446         const uint8_t *p = (const uint8_t *)state->mem64;
447         const uint8_t *const b_end = (const uint8_t *)state->mem64 +
448                 state->memsize;
449         uint64_t h64;
450
451         if (state->total_len >= 32) {
452                 const uint64_t v1 = state->v1;
453                 const uint64_t v2 = state->v2;
454                 const uint64_t v3 = state->v3;
455                 const uint64_t v4 = state->v4;
456
457                 h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
458                         xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
459                 h64 = xxh64_merge_round(h64, v1);
460                 h64 = xxh64_merge_round(h64, v2);
461                 h64 = xxh64_merge_round(h64, v3);
462                 h64 = xxh64_merge_round(h64, v4);
463         } else {
464                 h64  = state->v3 + PRIME64_5;
465         }
466
467         h64 += (uint64_t)state->total_len;
468
469         while (p + 8 <= b_end) {
470                 const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
471
472                 h64 ^= k1;
473                 h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
474                 p += 8;
475         }
476
477         if (p + 4 <= b_end) {
478                 h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
479                 h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
480                 p += 4;
481         }
482
483         while (p < b_end) {
484                 h64 ^= (*p) * PRIME64_5;
485                 h64 = xxh_rotl64(h64, 11) * PRIME64_1;
486                 p++;
487         }
488
489         h64 ^= h64 >> 33;
490         h64 *= PRIME64_2;
491         h64 ^= h64 >> 29;
492         h64 *= PRIME64_3;
493         h64 ^= h64 >> 32;
494
495         return h64;
496 }
497 EXPORT_SYMBOL(xxh64_digest);
498
499 MODULE_LICENSE("Dual BSD/GPL");
500 MODULE_DESCRIPTION("xxHash");