include/charset.h

   1 /* SPDX-License-Identifier: GPL-2.0+ */
   2 /*
   3  *  charset conversion utils
   4  *
   5  *  Copyright (c) 2017 Rob Clark
   6  */
   7
   8 #ifndef __CHARSET_H_
   9 #define __CHARSET_H_
  10
  11 #include <linux/kernel.h>
  12 #include <linux/types.h>
  13
  14 #define MAX_UTF8_PER_UTF16 3
  15
  16 /**
  17  * utf8_get() - get next UTF-8 code point from buffer
  18  *
  19  * @src:                pointer to current byte, updated to point to next byte
  20  * Return:              code point, or 0 for end of string, or -1 if no legal
  21  *                      code point is found. In case of an error src points to
  22  *                      the incorrect byte.
  23  */
  24 s32 utf8_get(const char **src);
  25
  26 /**
  27  * utf8_put() - write UTF-8 code point to buffer
  28  *
  29  * @code:               code point
  30  * @dst:                pointer to destination buffer, updated to next position
  31  * Return:              -1 if the input parameters are invalid
  32  */
  33 int utf8_put(s32 code, char **dst);
  34
  35 /**
  36  * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion
  37  *                        to utf-16
  38  *
  39  * @src:                utf-8 string
  40  * @count:              maximum number of code points to convert
  41  * Return:              length in bytes after conversion to utf-16 without the
  42  *                      trailing \0. If an invalid UTF-8 sequence is hit one
  43  *                      word will be reserved for a replacement character.
  44  */
  45 size_t utf8_utf16_strnlen(const char *src, size_t count);
  46
  47 /**
  48  * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16
  49  *
  50  * @src:                utf-8 string
  51  * Return:              length in bytes after conversion to utf-16 without the
  52  *                      trailing \0. -1 if the utf-8 string is not valid.
  53  */
  54 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX)
  55
  56 /**
  57  * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string
  58  *
  59  * @dst:                destination buffer
  60  * @src:                source buffer
  61  * @count:              maximum number of code points to copy
  62  * Return:              -1 if the input parameters are invalid
  63  */
  64 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count);
  65
  66 /**
  67  * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string
  68  *
  69  * @dst:                destination buffer
  70  * @src:                source buffer
  71  * Return:              -1 if the input parameters are invalid
  72  */
  73 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX)
  74
  75 /**
  76  * utf16_get() - get next UTF-16 code point from buffer
  77  *
  78  * @src:                pointer to current word, updated to point to next word
  79  * Return:              code point, or 0 for end of string, or -1 if no legal
  80  *                      code point is found. In case of an error src points to
  81  *                      the incorrect word.
  82  */
  83 s32 utf16_get(const u16 **src);
  84
  85 /**
  86  * utf16_put() - write UTF-16 code point to buffer
  87  *
  88  * @code:               code point
  89  * @dst:                pointer to destination buffer, updated to next position
  90  * Return:              -1 if the input parameters are invalid
  91  */
  92 int utf16_put(s32 code, u16 **dst);
  93
  94 /**
  95  * utf16_strnlen() - length of a truncated utf-16 string
  96  *
  97  * @src:                utf-16 string
  98  * @count:              maximum number of code points to convert
  99  * Return:              length in code points. If an invalid UTF-16 sequence is
 100  *                      hit one position will be reserved for a replacement
 101  *                      character.
 102  */
 103 size_t utf16_strnlen(const u16 *src, size_t count);
 104
 105 /**
 106  * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion
 107  *                        to utf-8
 108  *
 109  * @src:                utf-16 string
 110  * @count:              maximum number of code points to convert
 111  * Return:              length in bytes after conversion to utf-8 without the
 112  *                      trailing \0. If an invalid UTF-16 sequence is hit one
 113  *                      byte will be reserved for a replacement character.
 114  */
 115 size_t utf16_utf8_strnlen(const u16 *src, size_t count);
 116
 117 /**
 118  * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8
 119  *
 120  * @src:                utf-16 string
 121  * Return:              length in bytes after conversion to utf-8 without the
 122  *                      trailing \0. -1 if the utf-16 string is not valid.
 123  */
 124 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX)
 125
 126 /**
 127  * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string
 128  *
 129  * @dst:                destination buffer
 130  * @src:                source buffer
 131  * @count:              maximum number of code points to copy
 132  * Return:              -1 if the input parameters are invalid
 133  */
 134 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count);
 135
 136 /**
 137  * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string
 138  *
 139  * @dst:                destination buffer
 140  * @src:                source buffer
 141  * Return:              -1 if the input parameters are invalid
 142  */
 143 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX)
 144
 145 /**
 146  * utf_to_lower() - convert a Unicode letter to lower case
 147  *
 148  * @code:               letter to convert
 149  * Return:              lower case letter or unchanged letter
 150  */
 151 s32 utf_to_lower(const s32 code);
 152
 153 /**
 154  * utf_to_upper() - convert a Unicode letter to upper case
 155  *
 156  * @code:               letter to convert
 157  * Return:              upper case letter or unchanged letter
 158  */
 159 s32 utf_to_upper(const s32 code);
 160
 161 /**
 162  * u16_strlen - count non-zero words
 163  *
 164  * This function matches wsclen() if the -fshort-wchar compiler flag is set.
 165  * In the EFI context we explicitly need a function handling u16 strings.
 166  *
 167  * @in:                 null terminated u16 string
 168  * ReturnValue:         number of non-zero words.
 169  *                      This is not the number of utf-16 letters!
 170  */
 171 size_t u16_strlen(const u16 *in);
 172
 173 /**
 174  * u16_strlen - count non-zero words
 175  *
 176  * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set.
 177  * In the EFI context we explicitly need a function handling u16 strings.
 178  *
 179  * @in:                 null terminated u16 string
 180  * @count:              maximum number of words to count
 181  * ReturnValue:         number of non-zero words.
 182  *                      This is not the number of utf-16 letters!
 183  */
 184 size_t u16_strnlen(const u16 *in, size_t count);
 185
 186 /**
 187  * utf16_to_utf8() - Convert an utf16 string to utf8
 188  *
 189  * Converts 'size' characters of the utf16 string 'src' to utf8
 190  * written to the 'dest' buffer.
 191  *
 192  * NOTE that a single utf16 character can generate up to 3 utf8
 193  * characters.  See MAX_UTF8_PER_UTF16.
 194  *
 195  * @dest   the destination buffer to write the utf8 characters
 196  * @src    the source utf16 string
 197  * @size   the number of utf16 characters to convert
 198  * @return the pointer to the first unwritten byte in 'dest'
 199  */
 200 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
 201
 202 #endif /* __CHARSET_H_ */