2 * Generic binary BCH encoding/decoding library
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 * Copyright © 2011 Parrot S.A.
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
27 * the given m (Galois field order), t (error correction capability) and
28 * (optional) primitive polynomial parameters.
30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
31 * Call bch_decode to detect and locate errors in received data.
33 * On systems supporting hw BCH features, intermediate results may be provided
34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38 * parameters m and t; thus allowing extra compiler optimizations and providing
39 * better (up to 2x) encoding performance. Using this option makes sense when
40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41 * on a particular NAND flash device.
43 * Algorithmic details:
45 * Encoding is performed by processing 32 input bits in parallel, using 4
46 * remainder lookup tables.
48 * The final stage of decoding involves the following internal steps:
49 * a. Syndrome computation
50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51 * c. Error locator root finding (by far the most expensive step)
53 * In this implementation, step c is not performed using the usual Chien search.
54 * Instead, an alternative approach described in [1] is used. It consists in
55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58 * much better performance than Chien search for usual (m,t) values (typically
59 * m >= 13, t < 32, see [1]).
61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
68 #include <linux/kernel.h>
69 #include <linux/errno.h>
70 #include <linux/init.h>
71 #include <linux/module.h>
72 #include <linux/slab.h>
73 #include <linux/bitops.h>
74 #include <asm/byteorder.h>
75 #include <linux/bch.h>
77 #if defined(CONFIG_BCH_CONST_PARAMS)
78 #define GF_M(_p) (CONFIG_BCH_CONST_M)
79 #define GF_T(_p) (CONFIG_BCH_CONST_T)
80 #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
81 #define BCH_MAX_M (CONFIG_BCH_CONST_M)
82 #define BCH_MAX_T (CONFIG_BCH_CONST_T)
84 #define GF_M(_p) ((_p)->m)
85 #define GF_T(_p) ((_p)->t)
86 #define GF_N(_p) ((_p)->n)
87 #define BCH_MAX_M 15 /* 2KB */
88 #define BCH_MAX_T 64 /* 64 bit correction */
91 #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
92 #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
94 #define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
97 #define dbg(_fmt, args...) do {} while (0)
101 * represent a polynomial over GF(2^m)
104 unsigned int deg; /* polynomial degree */
105 unsigned int c[]; /* polynomial terms */
108 /* given its degree, compute a polynomial size in bytes */
109 #define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
111 /* polynomial of degree 1 */
112 struct gf_poly_deg1 {
117 static u8 swap_bits_table[] = {
118 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
119 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
120 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
121 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
122 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
123 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
124 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
125 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
126 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
127 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
128 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
129 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
130 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
131 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
132 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
133 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
134 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
135 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
136 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
137 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
138 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
139 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
140 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
141 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
142 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
143 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
144 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
145 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
146 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
147 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
148 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
149 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
152 static u8 swap_bits(struct bch_control *bch, u8 in)
157 return swap_bits_table[in];
161 * same as bch_encode(), but process input data one byte at a time
163 static void bch_encode_unaligned(struct bch_control *bch,
164 const unsigned char *data, unsigned int len,
169 const int l = BCH_ECC_WORDS(bch)-1;
172 u8 tmp = swap_bits(bch, *data++);
174 p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
176 for (i = 0; i < l; i++)
177 ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
179 ecc[l] = (ecc[l] << 8)^(*p);
184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
186 static void load_ecc8(struct bch_control *bch, uint32_t *dst,
189 uint8_t pad[4] = {0, 0, 0, 0};
190 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
192 for (i = 0; i < nwords; i++, src += 4)
193 dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
194 ((u32)swap_bits(bch, src[1]) << 16) |
195 ((u32)swap_bits(bch, src[2]) << 8) |
196 swap_bits(bch, src[3]);
198 memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
199 dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
200 ((u32)swap_bits(bch, pad[1]) << 16) |
201 ((u32)swap_bits(bch, pad[2]) << 8) |
202 swap_bits(bch, pad[3]);
206 * convert 32-bit ecc words to ecc bytes
208 static void store_ecc8(struct bch_control *bch, uint8_t *dst,
212 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
214 for (i = 0; i < nwords; i++) {
215 *dst++ = swap_bits(bch, src[i] >> 24);
216 *dst++ = swap_bits(bch, src[i] >> 16);
217 *dst++ = swap_bits(bch, src[i] >> 8);
218 *dst++ = swap_bits(bch, src[i]);
220 pad[0] = swap_bits(bch, src[nwords] >> 24);
221 pad[1] = swap_bits(bch, src[nwords] >> 16);
222 pad[2] = swap_bits(bch, src[nwords] >> 8);
223 pad[3] = swap_bits(bch, src[nwords]);
224 memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
228 * bch_encode - calculate BCH ecc parity of data
229 * @bch: BCH control structure
230 * @data: data to encode
231 * @len: data length in bytes
232 * @ecc: ecc parity data, must be initialized by caller
234 * The @ecc parity array is used both as input and output parameter, in order to
235 * allow incremental computations. It should be of the size indicated by member
236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
239 * @bch; it may be less than m*t for large values of t.
241 void bch_encode(struct bch_control *bch, const uint8_t *data,
242 unsigned int len, uint8_t *ecc)
244 const unsigned int l = BCH_ECC_WORDS(bch)-1;
245 unsigned int i, mlen;
247 uint32_t w, r[BCH_ECC_MAX_WORDS];
248 const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
249 const uint32_t * const tab0 = bch->mod8_tab;
250 const uint32_t * const tab1 = tab0 + 256*(l+1);
251 const uint32_t * const tab2 = tab1 + 256*(l+1);
252 const uint32_t * const tab3 = tab2 + 256*(l+1);
253 const uint32_t *pdata, *p0, *p1, *p2, *p3;
255 if (WARN_ON(r_bytes > sizeof(r)))
259 /* load ecc parity bytes into internal 32-bit buffer */
260 load_ecc8(bch, bch->ecc_buf, ecc);
262 memset(bch->ecc_buf, 0, r_bytes);
265 /* process first unaligned data bytes */
266 m = ((unsigned long)data) & 3;
268 mlen = (len < (4-m)) ? len : 4-m;
269 bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
274 /* process 32-bit aligned data words */
275 pdata = (uint32_t *)data;
279 memcpy(r, bch->ecc_buf, r_bytes);
282 * split each 32-bit word into 4 polynomials of weight 8 as follows:
284 * 31 ...24 23 ...16 15 ... 8 7 ... 0
285 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
286 * tttttttt mod g = r0 (precomputed)
287 * zzzzzzzz 00000000 mod g = r1 (precomputed)
288 * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
289 * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
290 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
293 /* input data is read in big-endian format */
294 w = cpu_to_be32(*pdata++);
296 w = (u32)swap_bits(bch, w) |
297 ((u32)swap_bits(bch, w >> 8) << 8) |
298 ((u32)swap_bits(bch, w >> 16) << 16) |
299 ((u32)swap_bits(bch, w >> 24) << 24);
301 p0 = tab0 + (l+1)*((w >> 0) & 0xff);
302 p1 = tab1 + (l+1)*((w >> 8) & 0xff);
303 p2 = tab2 + (l+1)*((w >> 16) & 0xff);
304 p3 = tab3 + (l+1)*((w >> 24) & 0xff);
306 for (i = 0; i < l; i++)
307 r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
309 r[l] = p0[l]^p1[l]^p2[l]^p3[l];
311 memcpy(bch->ecc_buf, r, r_bytes);
313 /* process last unaligned bytes */
315 bch_encode_unaligned(bch, data, len, bch->ecc_buf);
317 /* store ecc parity bytes into original parity buffer */
319 store_ecc8(bch, ecc, bch->ecc_buf);
321 EXPORT_SYMBOL_GPL(bch_encode);
323 static inline int modulo(struct bch_control *bch, unsigned int v)
325 const unsigned int n = GF_N(bch);
328 v = (v & n) + (v >> GF_M(bch));
334 * shorter and faster modulo function, only works when v < 2N.
336 static inline int mod_s(struct bch_control *bch, unsigned int v)
338 const unsigned int n = GF_N(bch);
339 return (v < n) ? v : v-n;
342 static inline int deg(unsigned int poly)
344 /* polynomial degree is the most-significant bit index */
348 static inline int parity(unsigned int x)
351 * public domain code snippet, lifted from
352 * http://www-graphics.stanford.edu/~seander/bithacks.html
356 x = (x & 0x11111111U) * 0x11111111U;
357 return (x >> 28) & 1;
360 /* Galois field basic operations: multiply, divide, inverse, etc. */
362 static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
365 return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
366 bch->a_log_tab[b])] : 0;
369 static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
371 return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
374 static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
377 return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
378 GF_N(bch)-bch->a_log_tab[b])] : 0;
381 static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
383 return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
386 static inline unsigned int a_pow(struct bch_control *bch, int i)
388 return bch->a_pow_tab[modulo(bch, i)];
391 static inline int a_log(struct bch_control *bch, unsigned int x)
393 return bch->a_log_tab[x];
396 static inline int a_ilog(struct bch_control *bch, unsigned int x)
398 return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
404 static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
410 const int t = GF_T(bch);
414 /* make sure extra bits in last ecc word are cleared */
415 m = ((unsigned int)s) & 31;
417 ecc[s/32] &= ~((1u << (32-m))-1);
418 memset(syn, 0, 2*t*sizeof(*syn));
420 /* compute v(a^j) for j=1 .. 2t-1 */
426 for (j = 0; j < 2*t; j += 2)
427 syn[j] ^= a_pow(bch, (j+1)*(i+s));
433 /* v(a^(2j)) = v(a^j)^2 */
434 for (j = 0; j < t; j++)
435 syn[2*j+1] = gf_sqr(bch, syn[j]);
438 static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
440 memcpy(dst, src, GF_POLY_SZ(src->deg));
443 static int compute_error_locator_polynomial(struct bch_control *bch,
444 const unsigned int *syn)
446 const unsigned int t = GF_T(bch);
447 const unsigned int n = GF_N(bch);
448 unsigned int i, j, tmp, l, pd = 1, d = syn[0];
449 struct gf_poly *elp = bch->elp;
450 struct gf_poly *pelp = bch->poly_2t[0];
451 struct gf_poly *elp_copy = bch->poly_2t[1];
454 memset(pelp, 0, GF_POLY_SZ(2*t));
455 memset(elp, 0, GF_POLY_SZ(2*t));
462 /* use simplified binary Berlekamp-Massey algorithm */
463 for (i = 0; (i < t) && (elp->deg <= t); i++) {
466 gf_poly_copy(elp_copy, elp);
467 /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
468 tmp = a_log(bch, d)+n-a_log(bch, pd);
469 for (j = 0; j <= pelp->deg; j++) {
471 l = a_log(bch, pelp->c[j]);
472 elp->c[j+k] ^= a_pow(bch, tmp+l);
475 /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
477 if (tmp > elp->deg) {
479 gf_poly_copy(pelp, elp_copy);
484 /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
487 for (j = 1; j <= elp->deg; j++)
488 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
491 dbg("elp=%s\n", gf_poly_str(elp));
492 return (elp->deg > t) ? -1 : (int)elp->deg;
496 * solve a m x m linear system in GF(2) with an expected number of solutions,
497 * and return the number of found solutions
499 static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
500 unsigned int *sol, int nsol)
502 const int m = GF_M(bch);
503 unsigned int tmp, mask;
504 int rem, c, r, p, k, param[BCH_MAX_M];
509 /* Gaussian elimination */
510 for (c = 0; c < m; c++) {
513 /* find suitable row for elimination */
514 for (r = p; r < m; r++) {
515 if (rows[r] & mask) {
526 /* perform elimination on remaining rows */
528 for (r = rem; r < m; r++) {
533 /* elimination not needed, store defective row index */
538 /* rewrite system, inserting fake parameter rows */
541 for (r = m-1; r >= 0; r--) {
542 if ((r > m-1-k) && rows[r])
543 /* system has no solution */
546 rows[r] = (p && (r == param[p-1])) ?
547 p--, 1u << (m-r) : rows[r-p];
551 if (nsol != (1 << k))
552 /* unexpected number of solutions */
555 for (p = 0; p < nsol; p++) {
556 /* set parameters for p-th solution */
557 for (c = 0; c < k; c++)
558 rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
560 /* compute unique solution */
562 for (r = m-1; r >= 0; r--) {
563 mask = rows[r] & (tmp|1);
564 tmp |= parity(mask) << (m-r);
572 * this function builds and solves a linear system for finding roots of a degree
573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
575 static int find_affine4_roots(struct bch_control *bch, unsigned int a,
576 unsigned int b, unsigned int c,
580 const int m = GF_M(bch);
581 unsigned int mask = 0xff, t, rows[16] = {0,};
587 /* build linear system to solve X^4+aX^2+bX+c = 0 */
588 for (i = 0; i < m; i++) {
589 rows[i+1] = bch->a_pow_tab[4*i]^
590 (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
591 (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
596 * transpose 16x16 matrix before passing it to linear solver
597 * warning: this code assumes m < 16
599 for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
600 for (k = 0; k < 16; k = (k+j+1) & ~j) {
601 t = ((rows[k] >> j)^rows[k+j]) & mask;
606 return solve_linear_system(bch, rows, roots, 4);
610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
612 static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
618 /* poly[X] = bX+c with c!=0, root=c/b */
619 roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
620 bch->a_log_tab[poly->c[1]]);
625 * compute roots of a degree 2 polynomial over GF(2^m)
627 static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
630 int n = 0, i, l0, l1, l2;
631 unsigned int u, v, r;
633 if (poly->c[0] && poly->c[1]) {
635 l0 = bch->a_log_tab[poly->c[0]];
636 l1 = bch->a_log_tab[poly->c[1]];
637 l2 = bch->a_log_tab[poly->c[2]];
639 /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
640 u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
642 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
643 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
644 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
645 * i.e. r and r+1 are roots iff Tr(u)=0
655 if ((gf_sqr(bch, r)^r) == u) {
656 /* reverse z=a/bX transformation and compute log(1/r) */
657 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
658 bch->a_log_tab[r]+l2);
659 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
660 bch->a_log_tab[r^1]+l2);
667 * compute roots of a degree 3 polynomial over GF(2^m)
669 static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
673 unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
676 /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
678 c2 = gf_div(bch, poly->c[0], e3);
679 b2 = gf_div(bch, poly->c[1], e3);
680 a2 = gf_div(bch, poly->c[2], e3);
682 /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
683 c = gf_mul(bch, a2, c2); /* c = a2c2 */
684 b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */
685 a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */
687 /* find the 4 roots of this affine polynomial */
688 if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
689 /* remove a2 from final list of roots */
690 for (i = 0; i < 4; i++) {
692 roots[n++] = a_ilog(bch, tmp[i]);
700 * compute roots of a degree 4 polynomial over GF(2^m)
702 static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
706 unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
711 /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
713 d = gf_div(bch, poly->c[0], e4);
714 c = gf_div(bch, poly->c[1], e4);
715 b = gf_div(bch, poly->c[2], e4);
716 a = gf_div(bch, poly->c[3], e4);
718 /* use Y=1/X transformation to get an affine polynomial */
720 /* first, eliminate cX by using z=X+e with ae^2+c=0 */
722 /* compute e such that e^2 = c/a */
723 f = gf_div(bch, c, a);
725 l += (l & 1) ? GF_N(bch) : 0;
728 * use transformation z=X+e:
729 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
730 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
731 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
732 * z^4 + az^3 + b'z^2 + d'
734 d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
735 b = gf_mul(bch, a, e)^b;
737 /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
739 /* assume all roots have multiplicity 1 */
743 b2 = gf_div(bch, a, d);
744 a2 = gf_div(bch, b, d);
746 /* polynomial is already affine */
751 /* find the 4 roots of this affine polynomial */
752 if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
753 for (i = 0; i < 4; i++) {
754 /* post-process roots (reverse transformations) */
755 f = a ? gf_inv(bch, roots[i]) : roots[i];
756 roots[i] = a_ilog(bch, f^e);
764 * build monic, log-based representation of a polynomial
766 static void gf_poly_logrep(struct bch_control *bch,
767 const struct gf_poly *a, int *rep)
769 int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
771 /* represent 0 values with -1; warning, rep[d] is not set to 1 */
772 for (i = 0; i < d; i++)
773 rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
779 static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
780 const struct gf_poly *b, int *rep)
783 unsigned int i, j, *c = a->c;
784 const unsigned int d = b->deg;
789 /* reuse or compute log representation of denominator */
792 gf_poly_logrep(bch, b, rep);
795 for (j = a->deg; j >= d; j--) {
797 la = a_log(bch, c[j]);
799 for (i = 0; i < d; i++, p++) {
802 c[p] ^= bch->a_pow_tab[mod_s(bch,
808 while (!c[a->deg] && a->deg)
813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
815 static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
816 const struct gf_poly *b, struct gf_poly *q)
818 if (a->deg >= b->deg) {
819 q->deg = a->deg-b->deg;
820 /* compute a mod b (modifies a) */
821 gf_poly_mod(bch, a, b, NULL);
822 /* quotient is stored in upper part of polynomial a */
823 memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
833 static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
838 dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
840 if (a->deg < b->deg) {
847 gf_poly_mod(bch, a, b, NULL);
853 dbg("%s\n", gf_poly_str(a));
859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
860 * This is used in Berlekamp Trace algorithm for splitting polynomials
862 static void compute_trace_bk_mod(struct bch_control *bch, int k,
863 const struct gf_poly *f, struct gf_poly *z,
866 const int m = GF_M(bch);
869 /* z contains z^2j mod f */
872 z->c[1] = bch->a_pow_tab[k];
875 memset(out, 0, GF_POLY_SZ(f->deg));
877 /* compute f log representation only once */
878 gf_poly_logrep(bch, f, bch->cache);
880 for (i = 0; i < m; i++) {
881 /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
882 for (j = z->deg; j >= 0; j--) {
883 out->c[j] ^= z->c[j];
884 z->c[2*j] = gf_sqr(bch, z->c[j]);
887 if (z->deg > out->deg)
892 /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
893 gf_poly_mod(bch, z, f, bch->cache);
896 while (!out->c[out->deg] && out->deg)
899 dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
905 static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
906 struct gf_poly **g, struct gf_poly **h)
908 struct gf_poly *f2 = bch->poly_2t[0];
909 struct gf_poly *q = bch->poly_2t[1];
910 struct gf_poly *tk = bch->poly_2t[2];
911 struct gf_poly *z = bch->poly_2t[3];
914 dbg("factoring %s...\n", gf_poly_str(f));
919 /* tk = Tr(a^k.X) mod f */
920 compute_trace_bk_mod(bch, k, f, z, tk);
923 /* compute g = gcd(f, tk) (destructive operation) */
925 gcd = gf_poly_gcd(bch, f2, tk);
926 if (gcd->deg < f->deg) {
927 /* compute h=f/gcd(f,tk); this will modify f and q */
928 gf_poly_div(bch, f, gcd, q);
929 /* store g and h in-place (clobbering f) */
930 *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
931 gf_poly_copy(*g, gcd);
938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
941 static int find_poly_roots(struct bch_control *bch, unsigned int k,
942 struct gf_poly *poly, unsigned int *roots)
945 struct gf_poly *f1, *f2;
948 /* handle low degree polynomials with ad hoc techniques */
950 cnt = find_poly_deg1_roots(bch, poly, roots);
953 cnt = find_poly_deg2_roots(bch, poly, roots);
956 cnt = find_poly_deg3_roots(bch, poly, roots);
959 cnt = find_poly_deg4_roots(bch, poly, roots);
962 /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
964 if (poly->deg && (k <= GF_M(bch))) {
965 factor_polynomial(bch, k, poly, &f1, &f2);
967 cnt += find_poly_roots(bch, k+1, f1, roots);
969 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
976 #if defined(USE_CHIEN_SEARCH)
978 * exhaustive root search (Chien) implementation - not used, included only for
979 * reference/comparison tests
981 static int chien_search(struct bch_control *bch, unsigned int len,
982 struct gf_poly *p, unsigned int *roots)
985 unsigned int i, j, syn, syn0, count = 0;
986 const unsigned int k = 8*len+bch->ecc_bits;
988 /* use a log-based representation of polynomial */
989 gf_poly_logrep(bch, p, bch->cache);
990 bch->cache[p->deg] = 0;
991 syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
993 for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
994 /* compute elp(a^i) */
995 for (j = 1, syn = syn0; j <= p->deg; j++) {
998 syn ^= a_pow(bch, m+j*i);
1001 roots[count++] = GF_N(bch)-i;
1002 if (count == p->deg)
1006 return (count == p->deg) ? count : 0;
1008 #define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009 #endif /* USE_CHIEN_SEARCH */
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch: BCH control structure
1014 * @data: received data, ignored if @calc_ecc is provided
1015 * @len: data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn: hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc: output array of error locations
1022 * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 * invalid parameters were provided
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1029 * by providing @data and @recv_ecc only:
1030 * bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1032 * by providing @recv_ecc and @calc_ecc:
1033 * bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 * bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1038 * by providing syndrome results @syn:
1039 * bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1053 int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055 const unsigned int *syn, unsigned int *errloc)
1057 const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1062 /* sanity check: make sure data length can be handled */
1063 if (8*len > (bch->n-bch->ecc_bits))
1066 /* if caller does not provide syndromes, compute them */
1069 /* compute received data ecc into an internal buffer */
1070 if (!data || !recv_ecc)
1072 bch_encode(bch, data, len, NULL);
1074 /* load provided calculated ecc */
1075 load_ecc8(bch, bch->ecc_buf, calc_ecc);
1077 /* load received ecc or assume it was XORed in calc_ecc */
1079 load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080 /* XOR received and calculated ecc */
1081 for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083 sum |= bch->ecc_buf[i];
1086 /* no error found */
1089 compute_syndromes(bch, bch->ecc_buf, bch->syn);
1093 err = compute_error_locator_polynomial(bch, syn);
1095 nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1100 /* post-process raw error locations for easier correction */
1101 nbits = (len*8)+bch->ecc_bits;
1102 for (i = 0; i < err; i++) {
1103 if (errloc[i] >= nbits) {
1107 errloc[i] = nbits-1-errloc[i];
1108 if (!bch->swap_bits)
1109 errloc[i] = (errloc[i] & ~7) |
1110 (7-(errloc[i] & 7));
1113 return (err >= 0) ? err : -EBADMSG;
1115 EXPORT_SYMBOL_GPL(bch_decode);
1118 * generate Galois field lookup tables
1120 static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1122 unsigned int i, x = 1;
1123 const unsigned int k = 1 << deg(poly);
1125 /* primitive polynomial must be of degree m */
1126 if (k != (1u << GF_M(bch)))
1129 for (i = 0; i < GF_N(bch); i++) {
1130 bch->a_pow_tab[i] = x;
1131 bch->a_log_tab[x] = i;
1133 /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1139 bch->a_pow_tab[GF_N(bch)] = 1;
1140 bch->a_log_tab[0] = 0;
1146 * compute generator polynomial remainder tables for fast encoding
1148 static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1151 uint32_t data, hi, lo, *tab;
1152 const int l = BCH_ECC_WORDS(bch);
1153 const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154 const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1156 memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1158 for (i = 0; i < 256; i++) {
1159 /* p(X)=i is a small polynomial of weight <= 8 */
1160 for (b = 0; b < 4; b++) {
1161 /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162 tab = bch->mod8_tab + (b*256+i)*l;
1166 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167 data ^= g[0] >> (31-d);
1168 for (j = 0; j < ecclen; j++) {
1169 hi = (d < 31) ? g[j] << (d+1) : 0;
1171 g[j+1] >> (31-d) : 0;
1180 * build a base for factoring degree 2 polynomials
1182 static int build_deg2_base(struct bch_control *bch)
1184 const int m = GF_M(bch);
1186 unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1188 /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189 for (i = 0; i < m; i++) {
1190 for (j = 0, sum = 0; j < m; j++)
1191 sum ^= a_pow(bch, i*(1 << j));
1194 ak = bch->a_pow_tab[i];
1198 /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1200 memset(xi, 0, sizeof(xi));
1202 for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203 y = gf_sqr(bch, x)^x;
1204 for (i = 0; i < 2; i++) {
1206 if (y && (r < m) && !xi[r]) {
1210 dbg("x%d = %x\n", r, x);
1216 /* should not happen but check anyway */
1217 return remaining ? -1 : 0;
1220 static void *bch_alloc(size_t size, int *err)
1224 ptr = kmalloc(size, GFP_KERNEL);
1231 * compute generator polynomial for given (m,t) parameters.
1233 static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1235 const unsigned int m = GF_M(bch);
1236 const unsigned int t = GF_T(bch);
1238 unsigned int i, j, nbits, r, word, *roots;
1242 g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243 roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244 genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1252 /* enumerate all roots of g(X) */
1253 memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254 for (i = 0; i < t; i++) {
1255 for (j = 0, r = 2*i+1; j < m; j++) {
1257 r = mod_s(bch, 2*r);
1260 /* build generator polynomial g(X) */
1263 for (i = 0; i < GF_N(bch); i++) {
1265 /* multiply g(X) by (X+root) */
1266 r = bch->a_pow_tab[i];
1268 for (j = g->deg; j > 0; j--)
1269 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1271 g->c[0] = gf_mul(bch, g->c[0], r);
1275 /* store left-justified binary representation of g(X) */
1280 nbits = (n > 32) ? 32 : n;
1281 for (j = 0, word = 0; j < nbits; j++) {
1283 word |= 1u << (31-j);
1285 genpoly[i++] = word;
1288 bch->ecc_bits = g->deg;
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m: Galois field order, should be in the range 5-15
1300 * @t: maximum error correction capability, in bits
1301 * @prim_poly: user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits: swap bits within data and syndrome bytes
1305 * a newly allocated BCH control structure if successful, NULL otherwise
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1319 struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1323 unsigned int i, words;
1325 struct bch_control *bch = NULL;
1327 const int min_m = 5;
1329 /* default primitive polynomials */
1330 static const unsigned int prim_poly_tab[] = {
1331 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1335 #if defined(CONFIG_BCH_CONST_PARAMS)
1336 if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337 printk(KERN_ERR "bch encoder/decoder was configured to support "
1338 "parameters m=%d, t=%d only!\n",
1339 CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1343 if ((m < min_m) || (m > BCH_MAX_M))
1345 * values of m greater than 15 are not currently supported;
1346 * supporting m > 15 would require changing table base type
1347 * (uint16_t) and a small patch in matrix transposition
1353 * we can support larger than 64 bits if necessary, at the
1354 * cost of higher stack usage.
1359 if ((t < 1) || (m*t >= ((1 << m)-1)))
1360 /* invalid t value */
1363 /* select a primitive polynomial for generating GF(2^m) */
1365 prim_poly = prim_poly_tab[m-min_m];
1367 bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1373 bch->n = (1 << m)-1;
1374 words = DIV_ROUND_UP(m*t, 32);
1375 bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376 bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377 bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378 bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379 bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380 bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381 bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382 bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383 bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384 bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385 bch->swap_bits = swap_bits;
1387 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388 bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1393 err = build_gf_tables(bch, prim_poly);
1397 /* use generator polynomial for computing encoding tables */
1398 genpoly = compute_generator_polynomial(bch);
1399 if (genpoly == NULL)
1402 build_mod8_tables(bch, genpoly);
1405 err = build_deg2_base(bch);
1415 EXPORT_SYMBOL_GPL(bch_init);
1418 * bch_free - free the BCH control structure
1419 * @bch: BCH control structure to release
1421 void bch_free(struct bch_control *bch)
1426 kfree(bch->a_pow_tab);
1427 kfree(bch->a_log_tab);
1428 kfree(bch->mod8_tab);
1429 kfree(bch->ecc_buf);
1430 kfree(bch->ecc_buf2);
1436 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437 kfree(bch->poly_2t[i]);
1442 EXPORT_SYMBOL_GPL(bch_free);
1444 MODULE_LICENSE("GPL");
1445 MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446 MODULE_DESCRIPTION("Binary BCH encoder/decoder");