uboot/lib/efi_loader/efi_unicode_collation.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * EFI Unicode collation protocol
   4 *
   5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
   6 */
   7
   8#include <common.h>
   9#include <charset.h>
  10#include <cp1250.h>
  11#include <cp437.h>
  12#include <efi_loader.h>
  13
  14/* Characters that may not be used in FAT 8.3 file names */
  15static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f";
  16
  17/*
  18 * EDK2 assumes codepage 1250 when creating FAT 8.3 file names.
  19 * Linux defaults to codepage 437 for FAT 8.3 file names.
  20 */
  21#if CONFIG_FAT_DEFAULT_CODEPAGE == 1250
  22/* Unicode code points for code page 1250 characters 0x80 - 0xff */
  23static const u16 codepage[] = CP1250;
  24#else
  25/* Unicode code points for code page 437 characters 0x80 - 0xff */
  26static const u16 *codepage = codepage_437;
  27#endif
  28
  29/* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */
  30const efi_guid_t efi_guid_unicode_collation_protocol2 =
  31        EFI_UNICODE_COLLATION_PROTOCOL2_GUID;
  32
  33/**
  34 * efi_stri_coll() - compare utf-16 strings case-insenitively
  35 *
  36 * @this:       unicode collation protocol instance
  37 * @s1:         first string
  38 * @s2:         second string
  39 *
  40 * This function implements the StriColl() service of the
  41 * EFI_UNICODE_COLLATION_PROTOCOL2.
  42 *
  43 * See the Unified Extensible Firmware Interface (UEFI) specification for
  44 * details.
  45 *
  46 * Return:      0: s1 == s2, > 0: s1 > s2, < 0: s1 < s2
  47 */
  48static efi_intn_t EFIAPI efi_stri_coll(
  49                struct efi_unicode_collation_protocol *this, u16 *s1, u16 *s2)
  50{
  51        s32 c1, c2;
  52        efi_intn_t ret = 0;
  53
  54        EFI_ENTRY("%p, %ls, %ls", this, s1, s2);
  55        for (; *s1 | *s2; ++s1, ++s2) {
  56                c1 = utf_to_upper(*s1);
  57                c2 = utf_to_upper(*s2);
  58                if (c1 < c2) {
  59                        ret = -1;
  60                        goto out;
  61                } else if (c1 > c2) {
  62                        ret = 1;
  63                        goto out;
  64                }
  65        }
  66out:
  67        EFI_EXIT(EFI_SUCCESS);
  68        return ret;
  69}
  70
  71/**
  72 * next_lower() - get next codepoint converted to lower case
  73 *
  74 * @string:     pointer to u16 string, on return advanced by one codepoint
  75 * Return:      first codepoint of string converted to lower case
  76 */
  77static s32 next_lower(const u16 **string)
  78{
  79        return utf_to_lower(utf16_get(string));
  80}
  81
  82/**
  83 * metai_match() - compare utf-16 string with a pattern string case-insenitively
  84 *
  85 * @string:     string to compare
  86 * @pattern:    pattern string
  87 *
  88 * The pattern string may use these:
  89 *      - * matches >= 0 characters
  90 *      - ? matches 1 character
  91 *      - [<char1><char2>...<charN>] match any character in the set
  92 *      - [<char1>-<char2>] matches any character in the range
  93 *
  94 * This function is called my efi_metai_match().
  95 *
  96 * For '*' pattern searches this function calls itself recursively.
  97 * Performance-wise this is suboptimal, especially for multiple '*' wildcards.
  98 * But it results in simple code.
  99 *
 100 * Return:      true if the string is matched.
 101 */
 102static bool metai_match(const u16 *string, const u16 *pattern)
 103{
 104        s32 first, s, p;
 105
 106        for (; *string && *pattern;) {
 107                const u16 *string_old = string;
 108
 109                s = next_lower(&string);
 110                p = next_lower(&pattern);
 111
 112                switch (p) {
 113                case '*':
 114                        /* Match 0 or more characters */
 115                        for (;; s = next_lower(&string)) {
 116                                if (metai_match(string_old, pattern))
 117                                        return true;
 118                                if (!s)
 119                                        return false;
 120                                string_old = string;
 121                        }
 122                case '?':
 123                        /* Match any one character */
 124                        break;
 125                case '[':
 126                        /* Match any character in the set */
 127                        p = next_lower(&pattern);
 128                        first = p;
 129                        if (first == ']')
 130                                /* Empty set */
 131                                return false;
 132                        p = next_lower(&pattern);
 133                        if (p == '-') {
 134                                /* Range */
 135                                p = next_lower(&pattern);
 136                                if (s < first || s > p)
 137                                        return false;
 138                                p = next_lower(&pattern);
 139                                if (p != ']')
 140                                        return false;
 141                        } else {
 142                                /* Set */
 143                                bool hit = false;
 144
 145                                if (s == first)
 146                                        hit = true;
 147                                for (; p && p != ']';
 148                                     p = next_lower(&pattern)) {
 149                                        if (p == s)
 150                                                hit = true;
 151                                }
 152                                if (!hit || p != ']')
 153                                        return false;
 154                        }
 155                        break;
 156                default:
 157                        /* Match one character */
 158                        if (p != s)
 159                                return false;
 160                }
 161        }
 162        if (!*pattern && !*string)
 163                return true;
 164        return false;
 165}
 166
 167/**
 168 * efi_metai_match() - compare utf-16 string with a pattern string
 169 *                     case-insenitively
 170 *
 171 * @this:       unicode collation protocol instance
 172 * @string:     string to compare
 173 * @pattern:    pattern string
 174 *
 175 * The pattern string may use these:
 176 *      - * matches >= 0 characters
 177 *      - ? matches 1 character
 178 *      - [<char1><char2>...<charN>] match any character in the set
 179 *      - [<char1>-<char2>] matches any character in the range
 180 *
 181 * This function implements the MetaMatch() service of the
 182 * EFI_UNICODE_COLLATION_PROTOCOL2.
 183 *
 184 * Return:      true if the string is matched.
 185 */
 186static bool EFIAPI efi_metai_match(struct efi_unicode_collation_protocol *this,
 187                                   const u16 *string, const u16 *pattern)
 188{
 189        bool ret;
 190
 191        EFI_ENTRY("%p, %ls, %ls", this, string, pattern);
 192        ret =  metai_match(string, pattern);
 193        EFI_EXIT(EFI_SUCCESS);
 194        return ret;
 195}
 196
 197/**
 198 * efi_str_lwr() - convert to lower case
 199 *
 200 * @this:       unicode collation protocol instance
 201 * @string:     string to convert
 202 *
 203 * The conversion is done in place. As long as upper and lower letters use the
 204 * same number of words this does not pose a problem.
 205 *
 206 * This function implements the StrLwr() service of the
 207 * EFI_UNICODE_COLLATION_PROTOCOL2.
 208 */
 209static void EFIAPI efi_str_lwr(struct efi_unicode_collation_protocol *this,
 210                               u16 *string)
 211{
 212        EFI_ENTRY("%p, %ls", this, string);
 213        for (; *string; ++string)
 214                *string = utf_to_lower(*string);
 215        EFI_EXIT(EFI_SUCCESS);
 216}
 217
 218/**
 219 * efi_str_upr() - convert to upper case
 220 *
 221 * @this:       unicode collation protocol instance
 222 * @string:     string to convert
 223 *
 224 * The conversion is done in place. As long as upper and lower letters use the
 225 * same number of words this does not pose a problem.
 226 *
 227 * This function implements the StrUpr() service of the
 228 * EFI_UNICODE_COLLATION_PROTOCOL2.
 229 */
 230static void EFIAPI efi_str_upr(struct efi_unicode_collation_protocol *this,
 231                               u16 *string)
 232{
 233        EFI_ENTRY("%p, %ls", this, string);
 234        for (; *string; ++string)
 235                *string = utf_to_upper(*string);
 236        EFI_EXIT(EFI_SUCCESS);
 237}
 238
 239/**
 240 * efi_fat_to_str() - convert an 8.3 file name from an OEM codepage to Unicode
 241 *
 242 * @this:       unicode collation protocol instance
 243 * @fat_size:   size of the string to convert
 244 * @fat:        string to convert
 245 * @string:     converted string
 246 *
 247 * This function implements the FatToStr() service of the
 248 * EFI_UNICODE_COLLATION_PROTOCOL2.
 249 */
 250static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this,
 251                                  efi_uintn_t fat_size, char *fat, u16 *string)
 252{
 253        efi_uintn_t i;
 254        u16 c;
 255
 256        EFI_ENTRY("%p, %zu, %s, %p", this, fat_size, fat, string);
 257        for (i = 0; i < fat_size; ++i) {
 258                c = (unsigned char)fat[i];
 259                if (c > 0x80)
 260                        c = codepage[i - 0x80];
 261                string[i] = c;
 262                if (!c)
 263                        break;
 264        }
 265        string[i] = 0;
 266        EFI_EXIT(EFI_SUCCESS);
 267}
 268
 269/**
 270 * efi_fat_to_str() - convert a utf-16 string to legal characters for a FAT
 271 *                    file name in an OEM code page
 272 *
 273 * @this:       unicode collation protocol instance
 274 * @string:     Unicode string to convert
 275 * @fat_size:   size of the target buffer
 276 * @fat:        converted string
 277 *
 278 * This function implements the StrToFat() service of the
 279 * EFI_UNICODE_COLLATION_PROTOCOL2.
 280 *
 281 * Return:      true if an illegal character was substituted by '_'.
 282 */
 283static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this,
 284                                  const u16 *string, efi_uintn_t fat_size,
 285                                  char *fat)
 286{
 287        efi_uintn_t i;
 288        s32 c;
 289        bool ret = false;
 290
 291        EFI_ENTRY("%p, %ls, %zu, %p", this, string, fat_size, fat);
 292        for (i = 0; i < fat_size;) {
 293                c = utf16_get(&string);
 294                switch (c) {
 295                /* Ignore period and space */
 296                case '.':
 297                case ' ':
 298                        continue;
 299                case 0:
 300                        break;
 301                }
 302                c = utf_to_upper(c);
 303                if (utf_to_cp(&c, codepage) ||
 304                    (c && (c < 0x20 || strchr(illegal, c)))) {
 305                        ret = true;
 306                        c = '_';
 307                }
 308
 309                fat[i] = c;
 310                if (!c)
 311                        break;
 312                ++i;
 313        }
 314        EFI_EXIT(EFI_SUCCESS);
 315        return ret;
 316}
 317
 318const struct efi_unicode_collation_protocol efi_unicode_collation_protocol2 = {
 319        .stri_coll = efi_stri_coll,
 320        .metai_match = efi_metai_match,
 321        .str_lwr = efi_str_lwr,
 322        .str_upr = efi_str_upr,
 323        .fat_to_str = efi_fat_to_str,
 324        .str_to_fat = efi_str_to_fat,
 325        .supported_languages = "en",
 326};
 327