uboot/lib/charset.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 *  charset conversion utils
   4 *
   5 *  Copyright (c) 2017 Rob Clark
   6 */
   7
   8#include <common.h>
   9#include <charset.h>
  10#include <capitalization.h>
  11#include <cp437.h>
  12#include <efi_loader.h>
  13#include <errno.h>
  14#include <malloc.h>
  15
  16/**
  17 * codepage_437 - Unicode to codepage 437 translation table
  18 */
  19const u16 codepage_437[128] = CP437;
  20
  21static struct capitalization_table capitalization_table[] =
  22#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
  23        UNICODE_CAPITALIZATION_TABLE;
  24#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
  25        CP1250_CAPITALIZATION_TABLE;
  26#else
  27        CP437_CAPITALIZATION_TABLE;
  28#endif
  29
  30/**
  31 * get_code() - read Unicode code point from UTF-8 stream
  32 *
  33 * @read_u8:    - stream reader
  34 * @src:        - string buffer passed to stream reader, optional
  35 * Return:      - Unicode code point, or -1
  36 */
  37static int get_code(u8 (*read_u8)(void *data), void *data)
  38{
  39        s32 ch = 0;
  40
  41        ch = read_u8(data);
  42        if (!ch)
  43                return 0;
  44        if (ch >= 0xc2 && ch <= 0xf4) {
  45                int code = 0;
  46
  47                if (ch >= 0xe0) {
  48                        if (ch >= 0xf0) {
  49                                /* 0xf0 - 0xf4 */
  50                                ch &= 0x07;
  51                                code = ch << 18;
  52                                ch = read_u8(data);
  53                                if (ch < 0x80 || ch > 0xbf)
  54                                        goto error;
  55                                ch &= 0x3f;
  56                        } else {
  57                                /* 0xe0 - 0xef */
  58                                ch &= 0x0f;
  59                        }
  60                        code += ch << 12;
  61                        if ((code >= 0xD800 && code <= 0xDFFF) ||
  62                            code >= 0x110000)
  63                                goto error;
  64                        ch = read_u8(data);
  65                        if (ch < 0x80 || ch > 0xbf)
  66                                goto error;
  67                }
  68                /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
  69                ch &= 0x3f;
  70                code += ch << 6;
  71                ch = read_u8(data);
  72                if (ch < 0x80 || ch > 0xbf)
  73                        goto error;
  74                ch &= 0x3f;
  75                ch += code;
  76        } else if (ch >= 0x80) {
  77                goto error;
  78        }
  79        return ch;
  80error:
  81        return -1;
  82}
  83
  84/**
  85 * read_string() - read byte from character string
  86 *
  87 * @data:       - pointer to string
  88 * Return:      - byte read
  89 *
  90 * The string pointer is incremented if it does not point to '\0'.
  91 */
  92static u8 read_string(void *data)
  93
  94{
  95        const char **src = (const char **)data;
  96        u8 c;
  97
  98        if (!src || !*src || !**src)
  99                return 0;
 100        c = **src;
 101        ++*src;
 102        return c;
 103}
 104
 105/**
 106 * read_console() - read byte from console
 107 *
 108 * @data        - not used, needed to match interface
 109 * Return:      - byte read or 0 on error
 110 */
 111static u8 read_console(void *data)
 112{
 113        int ch;
 114
 115        ch = getchar();
 116        if (ch < 0)
 117                ch = 0;
 118        return ch;
 119}
 120
 121int console_read_unicode(s32 *code)
 122{
 123        for (;;) {
 124                s32 c;
 125
 126                if (!tstc()) {
 127                        /* No input available */
 128                        return 1;
 129                }
 130
 131                /* Read Unicode code */
 132                c = get_code(read_console, NULL);
 133                if (c > 0) {
 134                        *code = c;
 135                        return 0;
 136                }
 137        }
 138}
 139
 140s32 utf8_get(const char **src)
 141{
 142        return get_code(read_string, src);
 143}
 144
 145int utf8_put(s32 code, char **dst)
 146{
 147        if (!dst || !*dst)
 148                return -1;
 149        if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
 150                return -1;
 151        if (code <= 0x007F) {
 152                **dst = code;
 153        } else {
 154                if (code <= 0x07FF) {
 155                        **dst = code >> 6 | 0xC0;
 156                } else {
 157                        if (code < 0x10000) {
 158                                **dst = code >> 12 | 0xE0;
 159                        } else {
 160                                **dst = code >> 18 | 0xF0;
 161                                ++*dst;
 162                                **dst = (code >> 12 & 0x3F) | 0x80;
 163                        }
 164                        ++*dst;
 165                        **dst = (code >> 6 & 0x3F) | 0x80;
 166                }
 167                ++*dst;
 168                **dst = (code & 0x3F) | 0x80;
 169        }
 170        ++*dst;
 171        return 0;
 172}
 173
 174size_t utf8_utf16_strnlen(const char *src, size_t count)
 175{
 176        size_t len = 0;
 177
 178        for (; *src && count; --count)  {
 179                s32 code = utf8_get(&src);
 180
 181                if (!code)
 182                        break;
 183                if (code < 0) {
 184                        /* Reserve space for a replacement character */
 185                        len += 1;
 186                } else if (code < 0x10000) {
 187                        len += 1;
 188                } else {
 189                        len += 2;
 190                }
 191        }
 192        return len;
 193}
 194
 195int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
 196{
 197        if (!src || !dst || !*dst)
 198                return -1;
 199
 200        for (; count && *src; --count) {
 201                s32 code = utf8_get(&src);
 202
 203                if (code < 0)
 204                        code = '?';
 205                utf16_put(code, dst);
 206        }
 207        **dst = 0;
 208        return 0;
 209}
 210
 211s32 utf16_get(const u16 **src)
 212{
 213        s32 code, code2;
 214
 215        if (!src || !*src)
 216                return -1;
 217        if (!**src)
 218                return 0;
 219        code = **src;
 220        ++*src;
 221        if (code >= 0xDC00 && code <= 0xDFFF)
 222                return -1;
 223        if (code >= 0xD800 && code <= 0xDBFF) {
 224                if (!**src)
 225                        return -1;
 226                code &= 0x3ff;
 227                code <<= 10;
 228                code += 0x10000;
 229                code2 = **src;
 230                ++*src;
 231                if (code2 <= 0xDC00 || code2 >= 0xDFFF)
 232                        return -1;
 233                code2 &= 0x3ff;
 234                code += code2;
 235        }
 236        return code;
 237}
 238
 239int utf16_put(s32 code, u16 **dst)
 240{
 241        if (!dst || !*dst)
 242                return -1;
 243        if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
 244                return -1;
 245        if (code < 0x10000) {
 246                **dst = code;
 247        } else {
 248                code -= 0x10000;
 249                **dst = code >> 10 | 0xD800;
 250                ++*dst;
 251                **dst = (code & 0x3ff) | 0xDC00;
 252        }
 253        ++*dst;
 254        return 0;
 255}
 256
 257size_t utf16_strnlen(const u16 *src, size_t count)
 258{
 259        size_t len = 0;
 260
 261        for (; *src && count; --count)  {
 262                s32 code = utf16_get(&src);
 263
 264                if (!code)
 265                        break;
 266                /*
 267                 * In case of an illegal sequence still reserve space for a
 268                 * replacement character.
 269                 */
 270                ++len;
 271        }
 272        return len;
 273}
 274
 275size_t utf16_utf8_strnlen(const u16 *src, size_t count)
 276{
 277        size_t len = 0;
 278
 279        for (; *src && count; --count)  {
 280                s32 code = utf16_get(&src);
 281
 282                if (!code)
 283                        break;
 284                if (code < 0)
 285                        /* Reserve space for a replacement character */
 286                        len += 1;
 287                else if (code < 0x80)
 288                        len += 1;
 289                else if (code < 0x800)
 290                        len += 2;
 291                else if (code < 0x10000)
 292                        len += 3;
 293                else
 294                        len += 4;
 295        }
 296        return len;
 297}
 298
 299int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
 300{
 301        if (!src || !dst || !*dst)
 302                return -1;
 303
 304        for (; count && *src; --count) {
 305                s32 code = utf16_get(&src);
 306
 307                if (code < 0)
 308                        code = '?';
 309                utf8_put(code, dst);
 310        }
 311        **dst = 0;
 312        return 0;
 313}
 314
 315s32 utf_to_lower(const s32 code)
 316{
 317        struct capitalization_table *pos = capitalization_table;
 318        s32 ret = code;
 319
 320        if (code <= 0x7f) {
 321                if (code >= 'A' && code <= 'Z')
 322                        ret += 0x20;
 323                return ret;
 324        }
 325        for (; pos->upper; ++pos) {
 326                if (pos->upper == code) {
 327                        ret = pos->lower;
 328                        break;
 329                }
 330        }
 331        return ret;
 332}
 333
 334s32 utf_to_upper(const s32 code)
 335{
 336        struct capitalization_table *pos = capitalization_table;
 337        s32 ret = code;
 338
 339        if (code <= 0x7f) {
 340                if (code >= 'a' && code <= 'z')
 341                        ret -= 0x20;
 342                return ret;
 343        }
 344        for (; pos->lower; ++pos) {
 345                if (pos->lower == code) {
 346                        ret = pos->upper;
 347                        break;
 348                }
 349        }
 350        return ret;
 351}
 352
 353/*
 354 * u16_strncmp() - compare two u16 string
 355 *
 356 * @s1:         first string to compare
 357 * @s2:         second string to compare
 358 * @n:          maximum number of u16 to compare
 359 * Return:      0  if the first n u16 are the same in s1 and s2
 360 *              < 0 if the first different u16 in s1 is less than the
 361 *              corresponding u16 in s2
 362 *              > 0 if the first different u16 in s1 is greater than the
 363 *              corresponding u16 in s2
 364 */
 365int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
 366{
 367        int ret = 0;
 368
 369        for (; n; --n, ++s1, ++s2) {
 370                ret = *s1 - *s2;
 371                if (ret || !*s1)
 372                        break;
 373        }
 374
 375        return ret;
 376}
 377
 378size_t u16_strlen(const void *in)
 379{
 380        const char *pos = in;
 381        size_t ret;
 382
 383        for (; pos[0] || pos[1]; pos += 2)
 384                ;
 385        ret = pos - (char *)in;
 386        ret >>= 1;
 387        return ret;
 388}
 389
 390size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
 391{
 392        size_t i;
 393        for (i = 0; count-- && in[i]; i++);
 394        return i;
 395}
 396
 397size_t u16_strsize(const void *in)
 398{
 399        return (u16_strlen(in) + 1) * sizeof(u16);
 400}
 401
 402u16 *u16_strcpy(u16 *dest, const u16 *src)
 403{
 404        u16 *tmp = dest;
 405
 406        for (;; dest++, src++) {
 407                *dest = *src;
 408                if (!*src)
 409                        break;
 410        }
 411
 412        return tmp;
 413}
 414
 415u16 *u16_strdup(const void *src)
 416{
 417        u16 *new;
 418        size_t len;
 419
 420        if (!src)
 421                return NULL;
 422        len = (u16_strlen(src) + 1) * sizeof(u16);
 423        new = malloc(len);
 424        if (!new)
 425                return NULL;
 426        memcpy(new, src, len);
 427
 428        return new;
 429}
 430
 431/* Convert UTF-16 to UTF-8.  */
 432uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
 433{
 434        uint32_t code_high = 0;
 435
 436        while (size--) {
 437                uint32_t code = *src++;
 438
 439                if (code_high) {
 440                        if (code >= 0xDC00 && code <= 0xDFFF) {
 441                                /* Surrogate pair.  */
 442                                code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
 443
 444                                *dest++ = (code >> 18) | 0xF0;
 445                                *dest++ = ((code >> 12) & 0x3F) | 0x80;
 446                                *dest++ = ((code >> 6) & 0x3F) | 0x80;
 447                                *dest++ = (code & 0x3F) | 0x80;
 448                        } else {
 449                                /* Error...  */
 450                                *dest++ = '?';
 451                                /* *src may be valid. Don't eat it.  */
 452                                src--;
 453                        }
 454
 455                        code_high = 0;
 456                } else {
 457                        if (code <= 0x007F) {
 458                                *dest++ = code;
 459                        } else if (code <= 0x07FF) {
 460                                *dest++ = (code >> 6) | 0xC0;
 461                                *dest++ = (code & 0x3F) | 0x80;
 462                        } else if (code >= 0xD800 && code <= 0xDBFF) {
 463                                code_high = code;
 464                                continue;
 465                        } else if (code >= 0xDC00 && code <= 0xDFFF) {
 466                                /* Error... */
 467                                *dest++ = '?';
 468                        } else if (code < 0x10000) {
 469                                *dest++ = (code >> 12) | 0xE0;
 470                                *dest++ = ((code >> 6) & 0x3F) | 0x80;
 471                                *dest++ = (code & 0x3F) | 0x80;
 472                        } else {
 473                                *dest++ = (code >> 18) | 0xF0;
 474                                *dest++ = ((code >> 12) & 0x3F) | 0x80;
 475                                *dest++ = ((code >> 6) & 0x3F) | 0x80;
 476                                *dest++ = (code & 0x3F) | 0x80;
 477                        }
 478                }
 479        }
 480
 481        return dest;
 482}
 483
 484int utf_to_cp(s32 *c, const u16 *codepage)
 485{
 486        if (*c >= 0x80) {
 487                int j;
 488
 489                /* Look up codepage translation */
 490                for (j = 0; j < 0x80; ++j) {
 491                        if (*c == codepage[j]) {
 492                                *c = j + 0x80;
 493                                return 0;
 494                        }
 495                }
 496                *c = '?';
 497                return -ENOENT;
 498        }
 499        return 0;
 500}
 501
 502int utf8_to_cp437_stream(u8 c, char *buffer)
 503{
 504        char *end;
 505        const char *pos;
 506        s32 s;
 507        int ret;
 508
 509        for (;;) {
 510                pos = buffer;
 511                end = buffer + strlen(buffer);
 512                *end++ = c;
 513                *end = 0;
 514                s = utf8_get(&pos);
 515                if (s > 0) {
 516                        *buffer = 0;
 517                        ret = utf_to_cp(&s, codepage_437);
 518                        return s;
 519                        }
 520                if (pos == end)
 521                        return 0;
 522                *buffer = 0;
 523        }
 524}
 525
 526int utf8_to_utf32_stream(u8 c, char *buffer)
 527{
 528        char *end;
 529        const char *pos;
 530        s32 s;
 531
 532        for (;;) {
 533                pos = buffer;
 534                end = buffer + strlen(buffer);
 535                *end++ = c;
 536                *end = 0;
 537                s = utf8_get(&pos);
 538                if (s > 0) {
 539                        *buffer = 0;
 540                        return s;
 541                }
 542                if (pos == end)
 543                        return 0;
 544                *buffer = 0;
 545        }
 546}
 547