1 // SPDX-License-Identifier: GPL-2.0
3 * linux/fs/hfsplus/unicode.c
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
9 * Handler routines for unicode strings
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16 case_fold(u16 c)
23 tmp = hfsplus_case_fold_table[c >> 8];
25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 const struct hfsplus_unistr *s2)
35 u16 len1, len2, c1, c2;
36 const hfsplus_unichr *p1, *p2;
38 len1 = be16_to_cpu(s1->length);
39 len2 = be16_to_cpu(s2->length);
47 c1 = case_fold(be16_to_cpu(*p1));
52 c2 = case_fold(be16_to_cpu(*p2));
58 return (c1 < c2) ? -1 : 1;
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66 const struct hfsplus_unistr *s2)
68 u16 len1, len2, c1, c2;
69 const hfsplus_unichr *p1, *p2;
72 len1 = be16_to_cpu(s1->length);
73 len2 = be16_to_cpu(s2->length);
77 for (len = min(len1, len2); len > 0; len--) {
78 c1 = be16_to_cpu(*p1);
79 c2 = be16_to_cpu(*p2);
81 return c1 < c2 ? -1 : 1;
86 return len1 < len2 ? -1 :
91 #define Hangul_SBase 0xac00
92 #define Hangul_LBase 0x1100
93 #define Hangul_VBase 0x1161
94 #define Hangul_TBase 0x11a7
95 #define Hangul_SCount 11172
96 #define Hangul_LCount 19
97 #define Hangul_VCount 21
98 #define Hangul_TCount 28
99 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
108 if (!e || cc < p[s * 2] || cc > p[e * 2])
114 else if (cc < p[i * 2])
117 return hfsplus_compose_table + p[i * 2 + 1];
122 int hfsplus_uni2asc(struct super_block *sb,
123 const struct hfsplus_unistr *ustr,
124 char *astr, int *len_p)
126 const hfsplus_unichr *ip;
127 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
131 int i, len, ustrlen, res, compose;
135 ustrlen = be16_to_cpu(ustr->length);
138 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
140 while (ustrlen > 0) {
141 c0 = be16_to_cpu(*ip++);
143 /* search for single decomposed char */
145 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
151 /* start of a possibly decomposed Hangul char */
156 c1 = be16_to_cpu(*ip) - Hangul_VBase;
157 if (c1 < Hangul_VCount) {
158 /* compose the Hangul char */
159 cc = (c0 - Hangul_LBase) * Hangul_VCount;
160 cc = (cc + c1) * Hangul_TCount;
166 c1 = be16_to_cpu(*ip) - Hangul_TBase;
167 if (c1 > 0 && c1 < Hangul_TCount) {
176 /* main loop for common case of not composed chars */
179 c1 = be16_to_cpu(*ip);
181 ce1 = hfsplus_compose_lookup(
182 hfsplus_compose_table, c1);
193 res = nls->uni2char(c0, op, len);
195 if (res == -ENAMETOOLONG)
206 ce2 = hfsplus_compose_lookup(ce1, c0);
209 while (i < ustrlen) {
210 ce1 = hfsplus_compose_lookup(ce2,
236 res = nls->uni2char(cc, op, len);
238 if (res == -ENAMETOOLONG)
248 *len_p = (char *)op - astr;
253 * Convert one or more ASCII characters into a single unicode character.
254 * Returns the number of ASCII characters corresponding to the unicode char.
256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
259 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
275 /* Decomposes a non-Hangul unicode character. */
276 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
280 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
281 if (off == 0 || off == 0xffff)
284 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
288 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
292 off = hfsplus_decompose_table[off + (uc & 0xf)];
296 return hfsplus_decompose_table + (off / 4);
300 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
301 * precomposed Hangul, otherwise return the length of the decomposition.
303 * This function was adapted from sample code from the Unicode Standard
304 * Annex #15: Unicode Normalization Forms, version 3.2.0.
306 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
307 * under the Terms of Use in http://www.unicode.org/copyright.html.
309 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
314 index = uc - Hangul_SBase;
315 if (index < 0 || index >= Hangul_SCount)
318 l = Hangul_LBase + index / Hangul_NCount;
319 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
320 t = Hangul_TBase + index % Hangul_TCount;
324 if (t != Hangul_TBase) {
331 /* Decomposes a single unicode character. */
332 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
336 /* Hangul is handled separately */
337 result = hangul_buffer;
338 *size = hfsplus_try_decompose_hangul(uc, result);
340 result = hfsplus_decompose_nonhangul(uc, size);
344 int hfsplus_asc2uni(struct super_block *sb,
345 struct hfsplus_unistr *ustr, int max_unistr_len,
346 const char *astr, int len)
348 int size, dsize, decompose;
349 u16 *dstr, outlen = 0;
353 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
354 while (outlen < max_unistr_len && len > 0) {
355 size = asc2unichar(sb, astr, len, &c);
358 dstr = decompose_unichar(c, &dsize, dhangul);
362 if (outlen + dsize > max_unistr_len)
365 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
366 } while (--dsize > 0);
368 ustr->unicode[outlen++] = cpu_to_be16(c);
373 ustr->length = cpu_to_be16(outlen);
375 return -ENAMETOOLONG;
380 * Hash a string to an integer as appropriate for the HFS+ filesystem.
381 * Composed unicode characters are decomposed and case-folding is performed
382 * if the appropriate bits are (un)set on the superblock.
384 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
386 struct super_block *sb = dentry->d_sb;
389 int casefold, decompose, size, len;
395 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
396 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
397 hash = init_name_hash(dentry);
402 size = asc2unichar(sb, astr, len, &c);
407 dstr = decompose_unichar(c, &dsize, dhangul);
416 hash = partial_name_hash(c2, hash);
417 } while (--dsize > 0);
423 hash = partial_name_hash(c2, hash);
426 str->hash = end_name_hash(hash);
432 * Compare strings with HFS+ filename ordering.
433 * Composed unicode characters are decomposed and case-folding is performed
434 * if the appropriate bits are (un)set on the superblock.
436 int hfsplus_compare_dentry(const struct dentry *dentry,
437 unsigned int len, const char *str, const struct qstr *name)
439 struct super_block *sb = dentry->d_sb;
440 int casefold, decompose, size;
441 int dsize1, dsize2, len1, len2;
442 const u16 *dstr1, *dstr2;
443 const char *astr1, *astr2;
446 u16 dhangul_1[3], dhangul_2[3];
448 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
449 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
455 dstr1 = dstr2 = NULL;
457 while (len1 > 0 && len2 > 0) {
459 size = asc2unichar(sb, astr1, len1, &c);
464 dstr1 = decompose_unichar(c, &dsize1,
466 if (!decompose || !dstr1) {
474 size = asc2unichar(sb, astr2, len2, &c);
479 dstr2 = decompose_unichar(c, &dsize2,
481 if (!decompose || !dstr2) {