linux/fs/hfsplus/unicode.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/hfsplus/unicode.c
   4 *
   5 * Copyright (C) 2001
   6 * Brad Boyer (flar@allandria.com)
   7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
   8 *
   9 * Handler routines for unicode strings
  10 */
  11
  12#include <linux/types.h>
  13#include <linux/nls.h>
  14#include "hfsplus_fs.h"
  15#include "hfsplus_raw.h"
  16
  17/* Fold the case of a unicode char, given the 16 bit value */
  18/* Returns folded char, or 0 if ignorable */
  19static inline u16 case_fold(u16 c)
  20{
  21        u16 tmp;
  22
  23        tmp = hfsplus_case_fold_table[c >> 8];
  24        if (tmp)
  25                tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  26        else
  27                tmp = c;
  28        return tmp;
  29}
  30
  31/* Compare unicode strings, return values like normal strcmp */
  32int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  33                       const struct hfsplus_unistr *s2)
  34{
  35        u16 len1, len2, c1, c2;
  36        const hfsplus_unichr *p1, *p2;
  37
  38        len1 = be16_to_cpu(s1->length);
  39        len2 = be16_to_cpu(s2->length);
  40        p1 = s1->unicode;
  41        p2 = s2->unicode;
  42
  43        while (1) {
  44                c1 = c2 = 0;
  45
  46                while (len1 && !c1) {
  47                        c1 = case_fold(be16_to_cpu(*p1));
  48                        p1++;
  49                        len1--;
  50                }
  51                while (len2 && !c2) {
  52                        c2 = case_fold(be16_to_cpu(*p2));
  53                        p2++;
  54                        len2--;
  55                }
  56
  57                if (c1 != c2)
  58                        return (c1 < c2) ? -1 : 1;
  59                if (!c1 && !c2)
  60                        return 0;
  61        }
  62}
  63
  64/* Compare names as a sequence of 16-bit unsigned integers */
  65int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  66                   const struct hfsplus_unistr *s2)
  67{
  68        u16 len1, len2, c1, c2;
  69        const hfsplus_unichr *p1, *p2;
  70        int len;
  71
  72        len1 = be16_to_cpu(s1->length);
  73        len2 = be16_to_cpu(s2->length);
  74        p1 = s1->unicode;
  75        p2 = s2->unicode;
  76
  77        for (len = min(len1, len2); len > 0; len--) {
  78                c1 = be16_to_cpu(*p1);
  79                c2 = be16_to_cpu(*p2);
  80                if (c1 != c2)
  81                        return c1 < c2 ? -1 : 1;
  82                p1++;
  83                p2++;
  84        }
  85
  86        return len1 < len2 ? -1 :
  87               len1 > len2 ? 1 : 0;
  88}
  89
  90
  91#define Hangul_SBase    0xac00
  92#define Hangul_LBase    0x1100
  93#define Hangul_VBase    0x1161
  94#define Hangul_TBase    0x11a7
  95#define Hangul_SCount   11172
  96#define Hangul_LCount   19
  97#define Hangul_VCount   21
  98#define Hangul_TCount   28
  99#define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
 100
 101
 102static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 103{
 104        int i, s, e;
 105
 106        s = 1;
 107        e = p[1];
 108        if (!e || cc < p[s * 2] || cc > p[e * 2])
 109                return NULL;
 110        do {
 111                i = (s + e) / 2;
 112                if (cc > p[i * 2])
 113                        s = i + 1;
 114                else if (cc < p[i * 2])
 115                        e = i - 1;
 116                else
 117                        return hfsplus_compose_table + p[i * 2 + 1];
 118        } while (s <= e);
 119        return NULL;
 120}
 121
 122int hfsplus_uni2asc(struct super_block *sb,
 123                const struct hfsplus_unistr *ustr,
 124                char *astr, int *len_p)
 125{
 126        const hfsplus_unichr *ip;
 127        struct nls_table *nls = HFSPLUS_SB(sb)->nls;
 128        u8 *op;
 129        u16 cc, c0, c1;
 130        u16 *ce1, *ce2;
 131        int i, len, ustrlen, res, compose;
 132
 133        op = astr;
 134        ip = ustr->unicode;
 135        ustrlen = be16_to_cpu(ustr->length);
 136        len = *len_p;
 137        ce1 = NULL;
 138        compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 139
 140        while (ustrlen > 0) {
 141                c0 = be16_to_cpu(*ip++);
 142                ustrlen--;
 143                /* search for single decomposed char */
 144                if (likely(compose))
 145                        ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
 146                if (ce1)
 147                        cc = ce1[0];
 148                else
 149                        cc = 0;
 150                if (cc) {
 151                        /* start of a possibly decomposed Hangul char */
 152                        if (cc != 0xffff)
 153                                goto done;
 154                        if (!ustrlen)
 155                                goto same;
 156                        c1 = be16_to_cpu(*ip) - Hangul_VBase;
 157                        if (c1 < Hangul_VCount) {
 158                                /* compose the Hangul char */
 159                                cc = (c0 - Hangul_LBase) * Hangul_VCount;
 160                                cc = (cc + c1) * Hangul_TCount;
 161                                cc += Hangul_SBase;
 162                                ip++;
 163                                ustrlen--;
 164                                if (!ustrlen)
 165                                        goto done;
 166                                c1 = be16_to_cpu(*ip) - Hangul_TBase;
 167                                if (c1 > 0 && c1 < Hangul_TCount) {
 168                                        cc += c1;
 169                                        ip++;
 170                                        ustrlen--;
 171                                }
 172                                goto done;
 173                        }
 174                }
 175                while (1) {
 176                        /* main loop for common case of not composed chars */
 177                        if (!ustrlen)
 178                                goto same;
 179                        c1 = be16_to_cpu(*ip);
 180                        if (likely(compose))
 181                                ce1 = hfsplus_compose_lookup(
 182                                        hfsplus_compose_table, c1);
 183                        if (ce1)
 184                                break;
 185                        switch (c0) {
 186                        case 0:
 187                                c0 = 0x2400;
 188                                break;
 189                        case '/':
 190                                c0 = ':';
 191                                break;
 192                        }
 193                        res = nls->uni2char(c0, op, len);
 194                        if (res < 0) {
 195                                if (res == -ENAMETOOLONG)
 196                                        goto out;
 197                                *op = '?';
 198                                res = 1;
 199                        }
 200                        op += res;
 201                        len -= res;
 202                        c0 = c1;
 203                        ip++;
 204                        ustrlen--;
 205                }
 206                ce2 = hfsplus_compose_lookup(ce1, c0);
 207                if (ce2) {
 208                        i = 1;
 209                        while (i < ustrlen) {
 210                                ce1 = hfsplus_compose_lookup(ce2,
 211                                        be16_to_cpu(ip[i]));
 212                                if (!ce1)
 213                                        break;
 214                                i++;
 215                                ce2 = ce1;
 216                        }
 217                        cc = ce2[0];
 218                        if (cc) {
 219                                ip += i;
 220                                ustrlen -= i;
 221                                goto done;
 222                        }
 223                }
 224same:
 225                switch (c0) {
 226                case 0:
 227                        cc = 0x2400;
 228                        break;
 229                case '/':
 230                        cc = ':';
 231                        break;
 232                default:
 233                        cc = c0;
 234                }
 235done:
 236                res = nls->uni2char(cc, op, len);
 237                if (res < 0) {
 238                        if (res == -ENAMETOOLONG)
 239                                goto out;
 240                        *op = '?';
 241                        res = 1;
 242                }
 243                op += res;
 244                len -= res;
 245        }
 246        res = 0;
 247out:
 248        *len_p = (char *)op - astr;
 249        return res;
 250}
 251
 252/*
 253 * Convert one or more ASCII characters into a single unicode character.
 254 * Returns the number of ASCII characters corresponding to the unicode char.
 255 */
 256static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 257                              wchar_t *uc)
 258{
 259        int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
 260        if (size <= 0) {
 261                *uc = '?';
 262                size = 1;
 263        }
 264        switch (*uc) {
 265        case 0x2400:
 266                *uc = 0;
 267                break;
 268        case ':':
 269                *uc = '/';
 270                break;
 271        }
 272        return size;
 273}
 274
 275/* Decomposes a non-Hangul unicode character. */
 276static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
 277{
 278        int off;
 279
 280        off = hfsplus_decompose_table[(uc >> 12) & 0xf];
 281        if (off == 0 || off == 0xffff)
 282                return NULL;
 283
 284        off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
 285        if (!off)
 286                return NULL;
 287
 288        off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
 289        if (!off)
 290                return NULL;
 291
 292        off = hfsplus_decompose_table[off + (uc & 0xf)];
 293        *size = off & 3;
 294        if (*size == 0)
 295                return NULL;
 296        return hfsplus_decompose_table + (off / 4);
 297}
 298
 299/*
 300 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
 301 * precomposed Hangul, otherwise return the length of the decomposition.
 302 *
 303 * This function was adapted from sample code from the Unicode Standard
 304 * Annex #15: Unicode Normalization Forms, version 3.2.0.
 305 *
 306 * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
 307 * under the Terms of Use in http://www.unicode.org/copyright.html.
 308 */
 309static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
 310{
 311        int index;
 312        int l, v, t;
 313
 314        index = uc - Hangul_SBase;
 315        if (index < 0 || index >= Hangul_SCount)
 316                return 0;
 317
 318        l = Hangul_LBase + index / Hangul_NCount;
 319        v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
 320        t = Hangul_TBase + index % Hangul_TCount;
 321
 322        result[0] = l;
 323        result[1] = v;
 324        if (t != Hangul_TBase) {
 325                result[2] = t;
 326                return 3;
 327        }
 328        return 2;
 329}
 330
 331/* Decomposes a single unicode character. */
 332static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
 333{
 334        u16 *result;
 335
 336        /* Hangul is handled separately */
 337        result = hangul_buffer;
 338        *size = hfsplus_try_decompose_hangul(uc, result);
 339        if (*size == 0)
 340                result = hfsplus_decompose_nonhangul(uc, size);
 341        return result;
 342}
 343
 344int hfsplus_asc2uni(struct super_block *sb,
 345                    struct hfsplus_unistr *ustr, int max_unistr_len,
 346                    const char *astr, int len)
 347{
 348        int size, dsize, decompose;
 349        u16 *dstr, outlen = 0;
 350        wchar_t c;
 351        u16 dhangul[3];
 352
 353        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 354        while (outlen < max_unistr_len && len > 0) {
 355                size = asc2unichar(sb, astr, len, &c);
 356
 357                if (decompose)
 358                        dstr = decompose_unichar(c, &dsize, dhangul);
 359                else
 360                        dstr = NULL;
 361                if (dstr) {
 362                        if (outlen + dsize > max_unistr_len)
 363                                break;
 364                        do {
 365                                ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
 366                        } while (--dsize > 0);
 367                } else
 368                        ustr->unicode[outlen++] = cpu_to_be16(c);
 369
 370                astr += size;
 371                len -= size;
 372        }
 373        ustr->length = cpu_to_be16(outlen);
 374        if (len > 0)
 375                return -ENAMETOOLONG;
 376        return 0;
 377}
 378
 379/*
 380 * Hash a string to an integer as appropriate for the HFS+ filesystem.
 381 * Composed unicode characters are decomposed and case-folding is performed
 382 * if the appropriate bits are (un)set on the superblock.
 383 */
 384int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
 385{
 386        struct super_block *sb = dentry->d_sb;
 387        const char *astr;
 388        const u16 *dstr;
 389        int casefold, decompose, size, len;
 390        unsigned long hash;
 391        wchar_t c;
 392        u16 c2;
 393        u16 dhangul[3];
 394
 395        casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 396        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 397        hash = init_name_hash(dentry);
 398        astr = str->name;
 399        len = str->len;
 400        while (len > 0) {
 401                int uninitialized_var(dsize);
 402                size = asc2unichar(sb, astr, len, &c);
 403                astr += size;
 404                len -= size;
 405
 406                if (decompose)
 407                        dstr = decompose_unichar(c, &dsize, dhangul);
 408                else
 409                        dstr = NULL;
 410                if (dstr) {
 411                        do {
 412                                c2 = *dstr++;
 413                                if (casefold)
 414                                        c2 = case_fold(c2);
 415                                if (!casefold || c2)
 416                                        hash = partial_name_hash(c2, hash);
 417                        } while (--dsize > 0);
 418                } else {
 419                        c2 = c;
 420                        if (casefold)
 421                                c2 = case_fold(c2);
 422                        if (!casefold || c2)
 423                                hash = partial_name_hash(c2, hash);
 424                }
 425        }
 426        str->hash = end_name_hash(hash);
 427
 428        return 0;
 429}
 430
 431/*
 432 * Compare strings with HFS+ filename ordering.
 433 * Composed unicode characters are decomposed and case-folding is performed
 434 * if the appropriate bits are (un)set on the superblock.
 435 */
 436int hfsplus_compare_dentry(const struct dentry *dentry,
 437                unsigned int len, const char *str, const struct qstr *name)
 438{
 439        struct super_block *sb = dentry->d_sb;
 440        int casefold, decompose, size;
 441        int dsize1, dsize2, len1, len2;
 442        const u16 *dstr1, *dstr2;
 443        const char *astr1, *astr2;
 444        u16 c1, c2;
 445        wchar_t c;
 446        u16 dhangul_1[3], dhangul_2[3];
 447
 448        casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 449        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 450        astr1 = str;
 451        len1 = len;
 452        astr2 = name->name;
 453        len2 = name->len;
 454        dsize1 = dsize2 = 0;
 455        dstr1 = dstr2 = NULL;
 456
 457        while (len1 > 0 && len2 > 0) {
 458                if (!dsize1) {
 459                        size = asc2unichar(sb, astr1, len1, &c);
 460                        astr1 += size;
 461                        len1 -= size;
 462
 463                        if (decompose)
 464                                dstr1 = decompose_unichar(c, &dsize1,
 465                                                          dhangul_1);
 466                        if (!decompose || !dstr1) {
 467                                c1 = c;
 468                                dstr1 = &c1;
 469                                dsize1 = 1;
 470                        }
 471                }
 472
 473                if (!dsize2) {
 474                        size = asc2unichar(sb, astr2, len2, &c);
 475                        astr2 += size;
 476                        len2 -= size;
 477
 478                        if (decompose)
 479                                dstr2 = decompose_unichar(c, &dsize2,
 480                                                          dhangul_2);
 481                        if (!decompose || !dstr2) {
 482                                c2 = c;
 483                                dstr2 = &c2;
 484                                dsize2 = 1;
 485                        }
 486                }
 487
 488                c1 = *dstr1;
 489                c2 = *dstr2;
 490                if (casefold) {
 491                        c1 = case_fold(c1);
 492                        if (!c1) {
 493                                dstr1++;
 494                                dsize1--;
 495                                continue;
 496                        }
 497                        c2 = case_fold(c2);
 498                        if (!c2) {
 499                                dstr2++;
 500                                dsize2--;
 501                                continue;
 502                        }
 503                }
 504                if (c1 < c2)
 505                        return -1;
 506                else if (c1 > c2)
 507                        return 1;
 508
 509                dstr1++;
 510                dsize1--;
 511                dstr2++;
 512                dsize2--;
 513        }
 514
 515        if (len1 < len2)
 516                return -1;
 517        if (len1 > len2)
 518                return 1;
 519        return 0;
 520}
 521