linux/fs/hfsplus/unicode.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/hfsplus/unicode.c
   3 *
   4 * Copyright (C) 2001
   5 * Brad Boyer (flar@allandria.com)
   6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
   7 *
   8 * Handler routines for unicode strings
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/nls.h>
  13#include "hfsplus_fs.h"
  14#include "hfsplus_raw.h"
  15
  16/* Fold the case of a unicode char, given the 16 bit value */
  17/* Returns folded char, or 0 if ignorable */
  18static inline u16 case_fold(u16 c)
  19{
  20        u16 tmp;
  21
  22        tmp = hfsplus_case_fold_table[c >> 8];
  23        if (tmp)
  24                tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  25        else
  26                tmp = c;
  27        return tmp;
  28}
  29
  30/* Compare unicode strings, return values like normal strcmp */
  31int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  32                       const struct hfsplus_unistr *s2)
  33{
  34        u16 len1, len2, c1, c2;
  35        const hfsplus_unichr *p1, *p2;
  36
  37        len1 = be16_to_cpu(s1->length);
  38        len2 = be16_to_cpu(s2->length);
  39        p1 = s1->unicode;
  40        p2 = s2->unicode;
  41
  42        while (1) {
  43                c1 = c2 = 0;
  44
  45                while (len1 && !c1) {
  46                        c1 = case_fold(be16_to_cpu(*p1));
  47                        p1++;
  48                        len1--;
  49                }
  50                while (len2 && !c2) {
  51                        c2 = case_fold(be16_to_cpu(*p2));
  52                        p2++;
  53                        len2--;
  54                }
  55
  56                if (c1 != c2)
  57                        return (c1 < c2) ? -1 : 1;
  58                if (!c1 && !c2)
  59                        return 0;
  60        }
  61}
  62
  63/* Compare names as a sequence of 16-bit unsigned integers */
  64int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  65                   const struct hfsplus_unistr *s2)
  66{
  67        u16 len1, len2, c1, c2;
  68        const hfsplus_unichr *p1, *p2;
  69        int len;
  70
  71        len1 = be16_to_cpu(s1->length);
  72        len2 = be16_to_cpu(s2->length);
  73        p1 = s1->unicode;
  74        p2 = s2->unicode;
  75
  76        for (len = min(len1, len2); len > 0; len--) {
  77                c1 = be16_to_cpu(*p1);
  78                c2 = be16_to_cpu(*p2);
  79                if (c1 != c2)
  80                        return c1 < c2 ? -1 : 1;
  81                p1++;
  82                p2++;
  83        }
  84
  85        return len1 < len2 ? -1 :
  86               len1 > len2 ? 1 : 0;
  87}
  88
  89
  90#define Hangul_SBase    0xac00
  91#define Hangul_LBase    0x1100
  92#define Hangul_VBase    0x1161
  93#define Hangul_TBase    0x11a7
  94#define Hangul_SCount   11172
  95#define Hangul_LCount   19
  96#define Hangul_VCount   21
  97#define Hangul_TCount   28
  98#define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
  99
 100
 101static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 102{
 103        int i, s, e;
 104
 105        s = 1;
 106        e = p[1];
 107        if (!e || cc < p[s * 2] || cc > p[e * 2])
 108                return NULL;
 109        do {
 110                i = (s + e) / 2;
 111                if (cc > p[i * 2])
 112                        s = i + 1;
 113                else if (cc < p[i * 2])
 114                        e = i - 1;
 115                else
 116                        return hfsplus_compose_table + p[i * 2 + 1];
 117        } while (s <= e);
 118        return NULL;
 119}
 120
 121int hfsplus_uni2asc(struct super_block *sb,
 122                const struct hfsplus_unistr *ustr,
 123                char *astr, int *len_p)
 124{
 125        const hfsplus_unichr *ip;
 126        struct nls_table *nls = HFSPLUS_SB(sb)->nls;
 127        u8 *op;
 128        u16 cc, c0, c1;
 129        u16 *ce1, *ce2;
 130        int i, len, ustrlen, res, compose;
 131
 132        op = astr;
 133        ip = ustr->unicode;
 134        ustrlen = be16_to_cpu(ustr->length);
 135        len = *len_p;
 136        ce1 = NULL;
 137        compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 138
 139        while (ustrlen > 0) {
 140                c0 = be16_to_cpu(*ip++);
 141                ustrlen--;
 142                /* search for single decomposed char */
 143                if (likely(compose))
 144                        ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
 145                if (ce1 && (cc = ce1[0])) {
 146                        /* start of a possibly decomposed Hangul char */
 147                        if (cc != 0xffff)
 148                                goto done;
 149                        if (!ustrlen)
 150                                goto same;
 151                        c1 = be16_to_cpu(*ip) - Hangul_VBase;
 152                        if (c1 < Hangul_VCount) {
 153                                /* compose the Hangul char */
 154                                cc = (c0 - Hangul_LBase) * Hangul_VCount;
 155                                cc = (cc + c1) * Hangul_TCount;
 156                                cc += Hangul_SBase;
 157                                ip++;
 158                                ustrlen--;
 159                                if (!ustrlen)
 160                                        goto done;
 161                                c1 = be16_to_cpu(*ip) - Hangul_TBase;
 162                                if (c1 > 0 && c1 < Hangul_TCount) {
 163                                        cc += c1;
 164                                        ip++;
 165                                        ustrlen--;
 166                                }
 167                                goto done;
 168                        }
 169                }
 170                while (1) {
 171                        /* main loop for common case of not composed chars */
 172                        if (!ustrlen)
 173                                goto same;
 174                        c1 = be16_to_cpu(*ip);
 175                        if (likely(compose))
 176                                ce1 = hfsplus_compose_lookup(
 177                                        hfsplus_compose_table, c1);
 178                        if (ce1)
 179                                break;
 180                        switch (c0) {
 181                        case 0:
 182                                c0 = 0x2400;
 183                                break;
 184                        case '/':
 185                                c0 = ':';
 186                                break;
 187                        }
 188                        res = nls->uni2char(c0, op, len);
 189                        if (res < 0) {
 190                                if (res == -ENAMETOOLONG)
 191                                        goto out;
 192                                *op = '?';
 193                                res = 1;
 194                        }
 195                        op += res;
 196                        len -= res;
 197                        c0 = c1;
 198                        ip++;
 199                        ustrlen--;
 200                }
 201                ce2 = hfsplus_compose_lookup(ce1, c0);
 202                if (ce2) {
 203                        i = 1;
 204                        while (i < ustrlen) {
 205                                ce1 = hfsplus_compose_lookup(ce2,
 206                                        be16_to_cpu(ip[i]));
 207                                if (!ce1)
 208                                        break;
 209                                i++;
 210                                ce2 = ce1;
 211                        }
 212                        if ((cc = ce2[0])) {
 213                                ip += i;
 214                                ustrlen -= i;
 215                                goto done;
 216                        }
 217                }
 218same:
 219                switch (c0) {
 220                case 0:
 221                        cc = 0x2400;
 222                        break;
 223                case '/':
 224                        cc = ':';
 225                        break;
 226                default:
 227                        cc = c0;
 228                }
 229done:
 230                res = nls->uni2char(cc, op, len);
 231                if (res < 0) {
 232                        if (res == -ENAMETOOLONG)
 233                                goto out;
 234                        *op = '?';
 235                        res = 1;
 236                }
 237                op += res;
 238                len -= res;
 239        }
 240        res = 0;
 241out:
 242        *len_p = (char *)op - astr;
 243        return res;
 244}
 245
 246/*
 247 * Convert one or more ASCII characters into a single unicode character.
 248 * Returns the number of ASCII characters corresponding to the unicode char.
 249 */
 250static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 251                              wchar_t *uc)
 252{
 253        int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
 254        if (size <= 0) {
 255                *uc = '?';
 256                size = 1;
 257        }
 258        switch (*uc) {
 259        case 0x2400:
 260                *uc = 0;
 261                break;
 262        case ':':
 263                *uc = '/';
 264                break;
 265        }
 266        return size;
 267}
 268
 269/* Decomposes a single unicode character. */
 270static inline u16 *decompose_unichar(wchar_t uc, int *size)
 271{
 272        int off;
 273
 274        off = hfsplus_decompose_table[(uc >> 12) & 0xf];
 275        if (off == 0 || off == 0xffff)
 276                return NULL;
 277
 278        off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
 279        if (!off)
 280                return NULL;
 281
 282        off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
 283        if (!off)
 284                return NULL;
 285
 286        off = hfsplus_decompose_table[off + (uc & 0xf)];
 287        *size = off & 3;
 288        if (*size == 0)
 289                return NULL;
 290        return hfsplus_decompose_table + (off / 4);
 291}
 292
 293int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
 294                    const char *astr, int len)
 295{
 296        int size, dsize, decompose;
 297        u16 *dstr, outlen = 0;
 298        wchar_t c;
 299
 300        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 301        while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
 302                size = asc2unichar(sb, astr, len, &c);
 303
 304                if (decompose && (dstr = decompose_unichar(c, &dsize))) {
 305                        if (outlen + dsize > HFSPLUS_MAX_STRLEN)
 306                                break;
 307                        do {
 308                                ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
 309                        } while (--dsize > 0);
 310                } else
 311                        ustr->unicode[outlen++] = cpu_to_be16(c);
 312
 313                astr += size;
 314                len -= size;
 315        }
 316        ustr->length = cpu_to_be16(outlen);
 317        if (len > 0)
 318                return -ENAMETOOLONG;
 319        return 0;
 320}
 321
 322/*
 323 * Hash a string to an integer as appropriate for the HFS+ filesystem.
 324 * Composed unicode characters are decomposed and case-folding is performed
 325 * if the appropriate bits are (un)set on the superblock.
 326 */
 327int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
 328                struct qstr *str)
 329{
 330        struct super_block *sb = dentry->d_sb;
 331        const char *astr;
 332        const u16 *dstr;
 333        int casefold, decompose, size, len;
 334        unsigned long hash;
 335        wchar_t c;
 336        u16 c2;
 337
 338        casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 339        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 340        hash = init_name_hash();
 341        astr = str->name;
 342        len = str->len;
 343        while (len > 0) {
 344                int uninitialized_var(dsize);
 345                size = asc2unichar(sb, astr, len, &c);
 346                astr += size;
 347                len -= size;
 348
 349                if (decompose && (dstr = decompose_unichar(c, &dsize))) {
 350                        do {
 351                                c2 = *dstr++;
 352                                if (!casefold || (c2 = case_fold(c2)))
 353                                        hash = partial_name_hash(c2, hash);
 354                        } while (--dsize > 0);
 355                } else {
 356                        c2 = c;
 357                        if (!casefold || (c2 = case_fold(c2)))
 358                                hash = partial_name_hash(c2, hash);
 359                }
 360        }
 361        str->hash = end_name_hash(hash);
 362
 363        return 0;
 364}
 365
 366/*
 367 * Compare strings with HFS+ filename ordering.
 368 * Composed unicode characters are decomposed and case-folding is performed
 369 * if the appropriate bits are (un)set on the superblock.
 370 */
 371int hfsplus_compare_dentry(const struct dentry *parent,
 372                const struct inode *pinode,
 373                const struct dentry *dentry, const struct inode *inode,
 374                unsigned int len, const char *str, const struct qstr *name)
 375{
 376        struct super_block *sb = parent->d_sb;
 377        int casefold, decompose, size;
 378        int dsize1, dsize2, len1, len2;
 379        const u16 *dstr1, *dstr2;
 380        const char *astr1, *astr2;
 381        u16 c1, c2;
 382        wchar_t c;
 383
 384        casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 385        decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 386        astr1 = str;
 387        len1 = len;
 388        astr2 = name->name;
 389        len2 = name->len;
 390        dsize1 = dsize2 = 0;
 391        dstr1 = dstr2 = NULL;
 392
 393        while (len1 > 0 && len2 > 0) {
 394                if (!dsize1) {
 395                        size = asc2unichar(sb, astr1, len1, &c);
 396                        astr1 += size;
 397                        len1 -= size;
 398
 399                        if (decompose)
 400                                dstr1 = decompose_unichar(c, &dsize1);
 401                        if (!decompose || !dstr1) {
 402                                c1 = c;
 403                                dstr1 = &c1;
 404                                dsize1 = 1;
 405                        }
 406                }
 407
 408                if (!dsize2) {
 409                        size = asc2unichar(sb, astr2, len2, &c);
 410                        astr2 += size;
 411                        len2 -= size;
 412
 413                        if (decompose)
 414                                dstr2 = decompose_unichar(c, &dsize2);
 415                        if (!decompose || !dstr2) {
 416                                c2 = c;
 417                                dstr2 = &c2;
 418                                dsize2 = 1;
 419                        }
 420                }
 421
 422                c1 = *dstr1;
 423                c2 = *dstr2;
 424                if (casefold) {
 425                        if  (!(c1 = case_fold(c1))) {
 426                                dstr1++;
 427                                dsize1--;
 428                                continue;
 429                        }
 430                        if (!(c2 = case_fold(c2))) {
 431                                dstr2++;
 432                                dsize2--;
 433                                continue;
 434                        }
 435                }
 436                if (c1 < c2)
 437                        return -1;
 438                else if (c1 > c2)
 439                        return 1;
 440
 441                dstr1++;
 442                dsize1--;
 443                dstr2++;
 444                dsize2--;
 445        }
 446
 447        if (len1 < len2)
 448                return -1;
 449        if (len1 > len2)
 450                return 1;
 451        return 0;
 452}
 453