1/* SPDX-License-Identifier: GPL-2.0+ */ 2/* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8#ifndef __CHARSET_H_ 9#define __CHARSET_H_ 10 11#include <linux/kernel.h> 12#include <linux/types.h> 13 14#define MAX_UTF8_PER_UTF16 3 15 16/* 17 * codepage_437 - Unicode to codepage 437 translation table 18 */ 19extern const u16 codepage_437[128]; 20 21/** 22 * console_read_unicode() - read Unicode code point from console 23 * 24 * @code: pointer to store Unicode code point 25 * Return: 0 = success 26 */ 27int console_read_unicode(s32 *code); 28 29/** 30 * utf8_get() - get next UTF-8 code point from buffer 31 * 32 * @src: pointer to current byte, updated to point to next byte 33 * Return: code point, or 0 for end of string, or -1 if no legal 34 * code point is found. In case of an error src points to 35 * the incorrect byte. 36 */ 37s32 utf8_get(const char **src); 38 39/** 40 * utf8_put() - write UTF-8 code point to buffer 41 * 42 * @code: code point 43 * @dst: pointer to destination buffer, updated to next position 44 * Return: -1 if the input parameters are invalid 45 */ 46int utf8_put(s32 code, char **dst); 47 48/** 49 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 50 * to utf-16 51 * 52 * @src: utf-8 string 53 * @count: maximum number of code points to convert 54 * Return: length in u16 after conversion to utf-16 without the 55 * trailing \0. If an invalid UTF-8 sequence is hit one 56 * u16 will be reserved for a replacement character. 57 */ 58size_t utf8_utf16_strnlen(const char *src, size_t count); 59 60/** 61 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 62 * 63 * @a: utf-8 string 64 * Return: length in u16 after conversion to utf-16 without the 65 * trailing \0. If an invalid UTF-8 sequence is hit one 66 * u16 will be reserved for a replacement character. 67 */ 68#define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 69 70/** 71 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 72 * 73 * @dst: destination buffer 74 * @src: source buffer 75 * @count: maximum number of code points to copy 76 * Return: -1 if the input parameters are invalid 77 */ 78int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 79 80/** 81 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 82 * 83 * @d: destination buffer 84 * @s: source buffer 85 * Return: -1 if the input parameters are invalid 86 */ 87#define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 88 89/** 90 * utf16_get() - get next UTF-16 code point from buffer 91 * 92 * @src: pointer to current word, updated to point to next word 93 * Return: code point, or 0 for end of string, or -1 if no legal 94 * code point is found. In case of an error src points to 95 * the incorrect word. 96 */ 97s32 utf16_get(const u16 **src); 98 99/** 100 * utf16_put() - write UTF-16 code point to buffer 101 * 102 * @code: code point 103 * @dst: pointer to destination buffer, updated to next position 104 * Return: -1 if the input parameters are invalid 105 */ 106int utf16_put(s32 code, u16 **dst); 107 108/** 109 * utf16_strnlen() - length of a truncated utf-16 string 110 * 111 * @src: utf-16 string 112 * @count: maximum number of code points to convert 113 * Return: length in code points. If an invalid UTF-16 sequence is 114 * hit one position will be reserved for a replacement 115 * character. 116 */ 117size_t utf16_strnlen(const u16 *src, size_t count); 118 119/** 120 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 121 * to utf-8 122 * 123 * @src: utf-16 string 124 * @count: maximum number of code points to convert 125 * Return: length in bytes after conversion to utf-8 without the 126 * trailing \0. If an invalid UTF-16 sequence is hit one 127 * byte will be reserved for a replacement character. 128 */ 129size_t utf16_utf8_strnlen(const u16 *src, size_t count); 130 131/** 132 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 133 * 134 * @a: utf-16 string 135 * Return: length in bytes after conversion to utf-8 without the 136 * trailing \0. If an invalid UTF-16 sequence is hit one 137 * byte will be reserved for a replacement character. 138 */ 139#define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 140 141/** 142 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 143 * 144 * @dst: destination buffer 145 * @src: source buffer 146 * @count: maximum number of code points to copy 147 * Return: -1 if the input parameters are invalid 148 */ 149int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 150 151/** 152 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 153 * 154 * @d: destination buffer 155 * @s: source buffer 156 * Return: -1 if the input parameters are invalid 157 */ 158#define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 159 160/** 161 * utf_to_lower() - convert a Unicode letter to lower case 162 * 163 * @code: letter to convert 164 * Return: lower case letter or unchanged letter 165 */ 166s32 utf_to_lower(const s32 code); 167 168/** 169 * utf_to_upper() - convert a Unicode letter to upper case 170 * 171 * @code: letter to convert 172 * Return: upper case letter or unchanged letter 173 */ 174s32 utf_to_upper(const s32 code); 175 176/** 177 * u16_strcasecmp() - compare two u16 strings case insensitively 178 * 179 * @s1: first string to compare 180 * @s2: second string to compare 181 * Return: 0 if the first n u16 are the same in s1 and s2 182 * < 0 if the first different u16 in s1 is less than the 183 * corresponding u16 in s2 184 * > 0 if the first different u16 in s1 is greater than the 185 */ 186int u16_strcasecmp(const u16 *s1, const u16 *s2); 187 188/** 189 * u16_strncmp() - compare two u16 string 190 * 191 * @s1: first string to compare 192 * @s2: second string to compare 193 * @n: maximum number of u16 to compare 194 * Return: 0 if the first n u16 are the same in s1 and s2 195 * < 0 if the first different u16 in s1 is less than the 196 * corresponding u16 in s2 197 * > 0 if the first different u16 in s1 is greater than the 198 * corresponding u16 in s2 199 */ 200int u16_strncmp(const u16 *s1, const u16 *s2, size_t n); 201 202/** 203 * u16_strcmp() - compare two u16 string 204 * 205 * @s1: first string to compare 206 * @s2: second string to compare 207 * Return: 0 if the first n u16 are the same in s1 and s2 208 * < 0 if the first different u16 in s1 is less than the 209 * corresponding u16 in s2 210 * > 0 if the first different u16 in s1 is greater than the 211 * corresponding u16 in s2 212 */ 213#define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX) 214 215/** 216 * u16_strsize() - count size of u16 string in bytes including the null 217 * character 218 * 219 * Counts the number of bytes occupied by a u16 string 220 * 221 * @in: null terminated u16 string 222 * Return: bytes in a u16 string 223 */ 224size_t u16_strsize(const void *in); 225 226/** 227 * u16_strnlen() - count non-zero words 228 * 229 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 230 * In the EFI context we explicitly need a function handling u16 strings. 231 * 232 * @in: null terminated u16 string 233 * @count: maximum number of words to count 234 * Return: number of non-zero words. 235 * This is not the number of utf-16 letters! 236 */ 237size_t u16_strnlen(const u16 *in, size_t count); 238 239/** 240 * u16_strlen - count non-zero words 241 * 242 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 243 * In the EFI context we explicitly need a function handling u16 strings. 244 * 245 * @in: null terminated u16 string 246 * Return: number of non-zero words. 247 * This is not the number of utf-16 letters! 248 */ 249size_t u16_strlen(const void *in); 250 251#define u16_strlen(in) u16_strnlen(in, SIZE_MAX) 252 253/** 254 * u16_strcpy() - copy u16 string 255 * 256 * Copy u16 string pointed to by src, including terminating null word, to 257 * the buffer pointed to by dest. 258 * 259 * @dest: destination buffer 260 * @src: source buffer (null terminated) 261 * Return: 'dest' address 262 */ 263u16 *u16_strcpy(u16 *dest, const u16 *src); 264 265/** 266 * u16_strdup() - duplicate u16 string 267 * 268 * Copy u16 string pointed to by src, including terminating null word, to a 269 * newly allocated buffer. 270 * 271 * @src: source buffer (null terminated) 272 * Return: allocated new buffer on success, NULL on failure 273 */ 274u16 *u16_strdup(const void *src); 275 276/** 277 * u16_strlcat() - Append a length-limited, %NUL-terminated string to another 278 * 279 * Append the source string @src to the destination string @dest, overwriting 280 * null word at the end of @dest adding a terminating null word. 281 * 282 * @dest: zero terminated u16 destination string 283 * @src: zero terminated u16 source string 284 * @count: size of buffer in u16 words including taling 0x0000 285 * Return: required size including trailing 0x0000 in u16 words 286 * If return value >= count, truncation occurred. 287 */ 288size_t u16_strlcat(u16 *dest, const u16 *src, size_t count); 289 290/** 291 * utf16_to_utf8() - Convert an utf16 string to utf8 292 * 293 * Converts 'size' characters of the utf16 string 'src' to utf8 294 * written to the 'dest' buffer. 295 * 296 * NOTE that a single utf16 character can generate up to 3 utf8 297 * characters. See MAX_UTF8_PER_UTF16. 298 * 299 * @dest: the destination buffer to write the utf8 characters 300 * @src: the source utf16 string 301 * @size: the number of utf16 characters to convert 302 * Return: the pointer to the first unwritten byte in 'dest' 303 */ 304uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 305 306/** 307 * utf_to_cp() - translate Unicode code point to 8bit codepage 308 * 309 * Codepoints that do not exist in the codepage are rendered as question mark. 310 * 311 * @c: pointer to Unicode code point to be translated 312 * @codepage: Unicode to codepage translation table 313 * Return: 0 on success, -ENOENT if codepoint cannot be translated 314 */ 315int utf_to_cp(s32 *c, const u16 *codepage); 316 317/** 318 * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 319 * 320 * @c: next UTF-8 character to convert 321 * @buffer: buffer, at least 5 characters 322 * Return: next codepage 437 character or 0 323 */ 324int utf8_to_cp437_stream(u8 c, char *buffer); 325 326/** 327 * utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32 328 * 329 * @c: next UTF-8 character to convert 330 * @buffer: buffer, at least 5 characters 331 * Return: next codepage 437 character or 0 332 */ 333int utf8_to_utf32_stream(u8 c, char *buffer); 334 335#endif /* __CHARSET_H_ */ 336