include/charset.h

   1 /* SPDX-License-Identifier: GPL-2.0+ */
   2 /*
   3  *  charset conversion utils
   4  *
   5  *  Copyright (c) 2017 Rob Clark
   6  */
   7
   8 #ifndef __CHARSET_H_
   9 #define __CHARSET_H_
  10
  11 #include <linux/kernel.h>
  12 #include <linux/types.h>
  13
  14 #define MAX_UTF8_PER_UTF16 3
  15
  16 /**
  17  * console_read_unicode() - read Unicode code point from console
  18  *
  19  * @code:       pointer to store Unicode code point
  20  * Return:      0 = success
  21  */
  22 int console_read_unicode(s32 *code);
  23
  24 /**
  25  * utf8_get() - get next UTF-8 code point from buffer
  26  *
  27  * @src:                pointer to current byte, updated to point to next byte
  28  * Return:              code point, or 0 for end of string, or -1 if no legal
  29  *                      code point is found. In case of an error src points to
  30  *                      the incorrect byte.
  31  */
  32 s32 utf8_get(const char **src);
  33
  34 /**
  35  * utf8_put() - write UTF-8 code point to buffer
  36  *
  37  * @code:               code point
  38  * @dst:                pointer to destination buffer, updated to next position
  39  * Return:              -1 if the input parameters are invalid
  40  */
  41 int utf8_put(s32 code, char **dst);
  42
  43 /**
  44  * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion
  45  *                        to utf-16
  46  *
  47  * @src:                utf-8 string
  48  * @count:              maximum number of code points to convert
  49  * Return:              length in u16 after conversion to utf-16 without the
  50  *                      trailing \0. If an invalid UTF-8 sequence is hit one
  51  *                      u16 will be reserved for a replacement character.
  52  */
  53 size_t utf8_utf16_strnlen(const char *src, size_t count);
  54
  55 /**
  56  * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16
  57  *
  58  * @a:                  utf-8 string
  59  * Return:              length in u16 after conversion to utf-16 without the
  60  *                      trailing \0. If an invalid UTF-8 sequence is hit one
  61  *                      u16 will be reserved for a replacement character.
  62  */
  63 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX)
  64
  65 /**
  66  * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string
  67  *
  68  * @dst:                destination buffer
  69  * @src:                source buffer
  70  * @count:              maximum number of code points to copy
  71  * Return:              -1 if the input parameters are invalid
  72  */
  73 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count);
  74
  75 /**
  76  * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string
  77  *
  78  * @d:                  destination buffer
  79  * @s:                  source buffer
  80  * Return:              -1 if the input parameters are invalid
  81  */
  82 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX)
  83
  84 /**
  85  * utf16_get() - get next UTF-16 code point from buffer
  86  *
  87  * @src:                pointer to current word, updated to point to next word
  88  * Return:              code point, or 0 for end of string, or -1 if no legal
  89  *                      code point is found. In case of an error src points to
  90  *                      the incorrect word.
  91  */
  92 s32 utf16_get(const u16 **src);
  93
  94 /**
  95  * utf16_put() - write UTF-16 code point to buffer
  96  *
  97  * @code:               code point
  98  * @dst:                pointer to destination buffer, updated to next position
  99  * Return:              -1 if the input parameters are invalid
 100  */
 101 int utf16_put(s32 code, u16 **dst);
 102
 103 /**
 104  * utf16_strnlen() - length of a truncated utf-16 string
 105  *
 106  * @src:                utf-16 string
 107  * @count:              maximum number of code points to convert
 108  * Return:              length in code points. If an invalid UTF-16 sequence is
 109  *                      hit one position will be reserved for a replacement
 110  *                      character.
 111  */
 112 size_t utf16_strnlen(const u16 *src, size_t count);
 113
 114 /**
 115  * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion
 116  *                        to utf-8
 117  *
 118  * @src:                utf-16 string
 119  * @count:              maximum number of code points to convert
 120  * Return:              length in bytes after conversion to utf-8 without the
 121  *                      trailing \0. If an invalid UTF-16 sequence is hit one
 122  *                      byte will be reserved for a replacement character.
 123  */
 124 size_t utf16_utf8_strnlen(const u16 *src, size_t count);
 125
 126 /**
 127  * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8
 128  *
 129  * @a:                  utf-16 string
 130  * Return:              length in bytes after conversion to utf-8 without the
 131  *                      trailing \0. If an invalid UTF-16 sequence is hit one
 132  *                      byte will be reserved for a replacement character.
 133  */
 134 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX)
 135
 136 /**
 137  * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string
 138  *
 139  * @dst:                destination buffer
 140  * @src:                source buffer
 141  * @count:              maximum number of code points to copy
 142  * Return:              -1 if the input parameters are invalid
 143  */
 144 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count);
 145
 146 /**
 147  * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string
 148  *
 149  * @d:                  destination buffer
 150  * @s:                  source buffer
 151  * Return:              -1 if the input parameters are invalid
 152  */
 153 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX)
 154
 155 /**
 156  * utf_to_lower() - convert a Unicode letter to lower case
 157  *
 158  * @code:               letter to convert
 159  * Return:              lower case letter or unchanged letter
 160  */
 161 s32 utf_to_lower(const s32 code);
 162
 163 /**
 164  * utf_to_upper() - convert a Unicode letter to upper case
 165  *
 166  * @code:               letter to convert
 167  * Return:              upper case letter or unchanged letter
 168  */
 169 s32 utf_to_upper(const s32 code);
 170
 171 /**
 172  * u16_strncmp() - compare two u16 string
 173  *
 174  * @s1:         first string to compare
 175  * @s2:         second string to compare
 176  * @n:          maximum number of u16 to compare
 177  * Return:      0  if the first n u16 are the same in s1 and s2
 178  *              < 0 if the first different u16 in s1 is less than the
 179  *              corresponding u16 in s2
 180  *              > 0 if the first different u16 in s1 is greater than the
 181  *              corresponding u16 in s2
 182  */
 183 int u16_strncmp(const u16 *s1, const u16 *s2, size_t n);
 184
 185 /**
 186  * u16_strcmp() - compare two u16 string
 187  *
 188  * @s1:         first string to compare
 189  * @s2:         second string to compare
 190  * Return:      0  if the first n u16 are the same in s1 and s2
 191  *              < 0 if the first different u16 in s1 is less than the
 192  *              corresponding u16 in s2
 193  *              > 0 if the first different u16 in s1 is greater than the
 194  *              corresponding u16 in s2
 195  */
 196 #define u16_strcmp(s1, s2)      u16_strncmp((s1), (s2), SIZE_MAX)
 197
 198 /**
 199  * u16_strlen - count non-zero words
 200  *
 201  * This function matches wsclen() if the -fshort-wchar compiler flag is set.
 202  * In the EFI context we explicitly need a function handling u16 strings.
 203  *
 204  * @in:                 null terminated u16 string
 205  * Return:              number of non-zero words.
 206  *                      This is not the number of utf-16 letters!
 207  */
 208 size_t u16_strlen(const void *in);
 209
 210 /**
 211  * u16_strsize() - count size of u16 string in bytes including the null
 212  *                 character
 213  *
 214  * Counts the number of bytes occupied by a u16 string
 215  *
 216  * @in:                 null terminated u16 string
 217  * Return:              bytes in a u16 string
 218  */
 219 size_t u16_strsize(const void *in);
 220
 221 /**
 222  * u16_strlen - count non-zero words
 223  *
 224  * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set.
 225  * In the EFI context we explicitly need a function handling u16 strings.
 226  *
 227  * @in:                 null terminated u16 string
 228  * @count:              maximum number of words to count
 229  * Return:              number of non-zero words.
 230  *                      This is not the number of utf-16 letters!
 231  */
 232 size_t u16_strnlen(const u16 *in, size_t count);
 233
 234 /**
 235  * u16_strcpy() - copy u16 string
 236  *
 237  * Copy u16 string pointed to by src, including terminating null word, to
 238  * the buffer pointed to by dest.
 239  *
 240  * @dest:               destination buffer
 241  * @src:                source buffer (null terminated)
 242  * Return:              'dest' address
 243  */
 244 u16 *u16_strcpy(u16 *dest, const u16 *src);
 245
 246 /**
 247  * u16_strdup() - duplicate u16 string
 248  *
 249  * Copy u16 string pointed to by src, including terminating null word, to a
 250  * newly allocated buffer.
 251  *
 252  * @src:                source buffer (null terminated)
 253  * Return:              allocated new buffer on success, NULL on failure
 254  */
 255 u16 *u16_strdup(const void *src);
 256
 257 /**
 258  * utf16_to_utf8() - Convert an utf16 string to utf8
 259  *
 260  * Converts 'size' characters of the utf16 string 'src' to utf8
 261  * written to the 'dest' buffer.
 262  *
 263  * NOTE that a single utf16 character can generate up to 3 utf8
 264  * characters.  See MAX_UTF8_PER_UTF16.
 265  *
 266  * @dest:       the destination buffer to write the utf8 characters
 267  * @src:        the source utf16 string
 268  * @size:       the number of utf16 characters to convert
 269  * Return:      the pointer to the first unwritten byte in 'dest'
 270  */
 271 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
 272
 273 #endif /* __CHARSET_H_ */