linux/fs/cifs/cifs_unicode.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * cifs_unicode:  Unicode kernel case support
   4 *
   5 * Function:
   6 *     Convert a unicode character to upper or lower case using
   7 *     compressed tables.
   8 *
   9 *   Copyright (c) International Business Machines  Corp., 2000,2009
  10 *
  11 * Notes:
  12 *     These APIs are based on the C library functions.  The semantics
  13 *     should match the C functions but with expanded size operands.
  14 *
  15 *     The upper/lower functions are based on a table created by mkupr.
  16 *     This is a compressed table of upper and lower case conversion.
  17 */
  18#ifndef _CIFS_UNICODE_H
  19#define _CIFS_UNICODE_H
  20
  21#include <asm/byteorder.h>
  22#include <linux/types.h>
  23#include <linux/nls.h>
  24
  25#define  UNIUPR_NOLOWER         /* Example to not expand lower case tables */
  26
  27/*
  28 * Windows maps these to the user defined 16 bit Unicode range since they are
  29 * reserved symbols (along with \ and /), otherwise illegal to store
  30 * in filenames in NTFS
  31 */
  32#define UNI_ASTERISK    (__u16) ('*' + 0xF000)
  33#define UNI_QUESTION    (__u16) ('?' + 0xF000)
  34#define UNI_COLON       (__u16) (':' + 0xF000)
  35#define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
  36#define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
  37#define UNI_PIPE        (__u16) ('|' + 0xF000)
  38#define UNI_SLASH       (__u16) ('\\' + 0xF000)
  39
  40/*
  41 * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
  42 * not conflict (although almost does) with the mapping above.
  43 */
  44
  45#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
  46#define SFM_ASTERISK    ((__u16) 0xF021)
  47#define SFM_QUESTION    ((__u16) 0xF025)
  48#define SFM_COLON       ((__u16) 0xF022)
  49#define SFM_GRTRTHAN    ((__u16) 0xF024)
  50#define SFM_LESSTHAN    ((__u16) 0xF023)
  51#define SFM_PIPE        ((__u16) 0xF027)
  52#define SFM_SLASH       ((__u16) 0xF026)
  53#define SFM_SPACE       ((__u16) 0xF028)
  54#define SFM_PERIOD      ((__u16) 0xF029)
  55
  56/*
  57 * Mapping mechanism to use when one of the seven reserved characters is
  58 * encountered.  We can only map using one of the mechanisms at a time
  59 * since otherwise readdir could return directory entries which we would
  60 * not be able to open
  61 *
  62 * NO_MAP_UNI_RSVD  = do not perform any remapping of the character
  63 * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
  64 * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
  65 *
  66 */
  67#define NO_MAP_UNI_RSVD         0
  68#define SFM_MAP_UNI_RSVD        1
  69#define SFU_MAP_UNI_RSVD        2
  70
  71/* Just define what we want from uniupr.h.  We don't want to define the tables
  72 * in each source file.
  73 */
  74#ifndef UNICASERANGE_DEFINED
  75struct UniCaseRange {
  76        wchar_t start;
  77        wchar_t end;
  78        signed char *table;
  79};
  80#endif                          /* UNICASERANGE_DEFINED */
  81
  82#ifndef UNIUPR_NOUPPER
  83extern signed char CifsUniUpperTable[512];
  84extern const struct UniCaseRange CifsUniUpperRange[];
  85#endif                          /* UNIUPR_NOUPPER */
  86
  87#ifndef UNIUPR_NOLOWER
  88extern signed char CifsUniLowerTable[512];
  89extern const struct UniCaseRange CifsUniLowerRange[];
  90#endif                          /* UNIUPR_NOLOWER */
  91
  92#ifdef __KERNEL__
  93int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
  94                    const struct nls_table *cp, int map_type);
  95int cifs_utf16_bytes(const __le16 *from, int maxbytes,
  96                     const struct nls_table *codepage);
  97int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
  98char *cifs_strndup_from_utf16(const char *src, const int maxlen,
  99                              const bool is_unicode,
 100                              const struct nls_table *codepage);
 101extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
 102                              const struct nls_table *cp, int mapChars);
 103extern int cifs_remap(struct cifs_sb_info *cifs_sb);
 104extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
 105                                     int *utf16_len, const struct nls_table *cp,
 106                                     int remap);
 107#endif
 108
 109wchar_t cifs_toupper(wchar_t in);
 110
 111/*
 112 * UniStrcat:  Concatenate the second string to the first
 113 *
 114 * Returns:
 115 *     Address of the first string
 116 */
 117static inline __le16 *
 118UniStrcat(__le16 *ucs1, const __le16 *ucs2)
 119{
 120        __le16 *anchor = ucs1;  /* save a pointer to start of ucs1 */
 121
 122        while (*ucs1++) ;       /* To end of first string */
 123        ucs1--;                 /* Return to the null */
 124        while ((*ucs1++ = *ucs2++)) ;   /* copy string 2 over */
 125        return anchor;
 126}
 127
 128/*
 129 * UniStrchr:  Find a character in a string
 130 *
 131 * Returns:
 132 *     Address of first occurrence of character in string
 133 *     or NULL if the character is not in the string
 134 */
 135static inline wchar_t *
 136UniStrchr(const wchar_t *ucs, wchar_t uc)
 137{
 138        while ((*ucs != uc) && *ucs)
 139                ucs++;
 140
 141        if (*ucs == uc)
 142                return (wchar_t *) ucs;
 143        return NULL;
 144}
 145
 146/*
 147 * UniStrcmp:  Compare two strings
 148 *
 149 * Returns:
 150 *     < 0:  First string is less than second
 151 *     = 0:  Strings are equal
 152 *     > 0:  First string is greater than second
 153 */
 154static inline int
 155UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
 156{
 157        while ((*ucs1 == *ucs2) && *ucs1) {
 158                ucs1++;
 159                ucs2++;
 160        }
 161        return (int) *ucs1 - (int) *ucs2;
 162}
 163
 164/*
 165 * UniStrcpy:  Copy a string
 166 */
 167static inline wchar_t *
 168UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
 169{
 170        wchar_t *anchor = ucs1; /* save the start of result string */
 171
 172        while ((*ucs1++ = *ucs2++)) ;
 173        return anchor;
 174}
 175
 176/*
 177 * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
 178 */
 179static inline size_t
 180UniStrlen(const wchar_t *ucs1)
 181{
 182        int i = 0;
 183
 184        while (*ucs1++)
 185                i++;
 186        return i;
 187}
 188
 189/*
 190 * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
 191 *              string (length limited)
 192 */
 193static inline size_t
 194UniStrnlen(const wchar_t *ucs1, int maxlen)
 195{
 196        int i = 0;
 197
 198        while (*ucs1++) {
 199                i++;
 200                if (i >= maxlen)
 201                        break;
 202        }
 203        return i;
 204}
 205
 206/*
 207 * UniStrncat:  Concatenate length limited string
 208 */
 209static inline wchar_t *
 210UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 211{
 212        wchar_t *anchor = ucs1; /* save pointer to string 1 */
 213
 214        while (*ucs1++) ;
 215        ucs1--;                 /* point to null terminator of s1 */
 216        while (n-- && (*ucs1 = *ucs2)) {        /* copy s2 after s1 */
 217                ucs1++;
 218                ucs2++;
 219        }
 220        *ucs1 = 0;              /* Null terminate the result */
 221        return (anchor);
 222}
 223
 224/*
 225 * UniStrncmp:  Compare length limited string
 226 */
 227static inline int
 228UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 229{
 230        if (!n)
 231                return 0;       /* Null strings are equal */
 232        while ((*ucs1 == *ucs2) && *ucs1 && --n) {
 233                ucs1++;
 234                ucs2++;
 235        }
 236        return (int) *ucs1 - (int) *ucs2;
 237}
 238
 239/*
 240 * UniStrncmp_le:  Compare length limited string - native to little-endian
 241 */
 242static inline int
 243UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 244{
 245        if (!n)
 246                return 0;       /* Null strings are equal */
 247        while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
 248                ucs1++;
 249                ucs2++;
 250        }
 251        return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
 252}
 253
 254/*
 255 * UniStrncpy:  Copy length limited string with pad
 256 */
 257static inline wchar_t *
 258UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 259{
 260        wchar_t *anchor = ucs1;
 261
 262        while (n-- && *ucs2)    /* Copy the strings */
 263                *ucs1++ = *ucs2++;
 264
 265        n++;
 266        while (n--)             /* Pad with nulls */
 267                *ucs1++ = 0;
 268        return anchor;
 269}
 270
 271/*
 272 * UniStrncpy_le:  Copy length limited string with pad to little-endian
 273 */
 274static inline wchar_t *
 275UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
 276{
 277        wchar_t *anchor = ucs1;
 278
 279        while (n-- && *ucs2)    /* Copy the strings */
 280                *ucs1++ = __le16_to_cpu(*ucs2++);
 281
 282        n++;
 283        while (n--)             /* Pad with nulls */
 284                *ucs1++ = 0;
 285        return anchor;
 286}
 287
 288/*
 289 * UniStrstr:  Find a string in a string
 290 *
 291 * Returns:
 292 *     Address of first match found
 293 *     NULL if no matching string is found
 294 */
 295static inline wchar_t *
 296UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
 297{
 298        const wchar_t *anchor1 = ucs1;
 299        const wchar_t *anchor2 = ucs2;
 300
 301        while (*ucs1) {
 302                if (*ucs1 == *ucs2) {
 303                        /* Partial match found */
 304                        ucs1++;
 305                        ucs2++;
 306                } else {
 307                        if (!*ucs2)     /* Match found */
 308                                return (wchar_t *) anchor1;
 309                        ucs1 = ++anchor1;       /* No match */
 310                        ucs2 = anchor2;
 311                }
 312        }
 313
 314        if (!*ucs2)             /* Both end together */
 315                return (wchar_t *) anchor1;     /* Match found */
 316        return NULL;            /* No match */
 317}
 318
 319#ifndef UNIUPR_NOUPPER
 320/*
 321 * UniToupper:  Convert a unicode character to upper case
 322 */
 323static inline wchar_t
 324UniToupper(register wchar_t uc)
 325{
 326        register const struct UniCaseRange *rp;
 327
 328        if (uc < sizeof(CifsUniUpperTable)) {
 329                /* Latin characters */
 330                return uc + CifsUniUpperTable[uc];      /* Use base tables */
 331        } else {
 332                rp = CifsUniUpperRange; /* Use range tables */
 333                while (rp->start) {
 334                        if (uc < rp->start)     /* Before start of range */
 335                                return uc;      /* Uppercase = input */
 336                        if (uc <= rp->end)      /* In range */
 337                                return uc + rp->table[uc - rp->start];
 338                        rp++;   /* Try next range */
 339                }
 340        }
 341        return uc;              /* Past last range */
 342}
 343
 344/*
 345 * UniStrupr:  Upper case a unicode string
 346 */
 347static inline __le16 *
 348UniStrupr(register __le16 *upin)
 349{
 350        register __le16 *up;
 351
 352        up = upin;
 353        while (*up) {           /* For all characters */
 354                *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
 355                up++;
 356        }
 357        return upin;            /* Return input pointer */
 358}
 359#endif                          /* UNIUPR_NOUPPER */
 360
 361#ifndef UNIUPR_NOLOWER
 362/*
 363 * UniTolower:  Convert a unicode character to lower case
 364 */
 365static inline wchar_t
 366UniTolower(register wchar_t uc)
 367{
 368        register const struct UniCaseRange *rp;
 369
 370        if (uc < sizeof(CifsUniLowerTable)) {
 371                /* Latin characters */
 372                return uc + CifsUniLowerTable[uc];      /* Use base tables */
 373        } else {
 374                rp = CifsUniLowerRange; /* Use range tables */
 375                while (rp->start) {
 376                        if (uc < rp->start)     /* Before start of range */
 377                                return uc;      /* Uppercase = input */
 378                        if (uc <= rp->end)      /* In range */
 379                                return uc + rp->table[uc - rp->start];
 380                        rp++;   /* Try next range */
 381                }
 382        }
 383        return uc;              /* Past last range */
 384}
 385
 386/*
 387 * UniStrlwr:  Lower case a unicode string
 388 */
 389static inline wchar_t *
 390UniStrlwr(register wchar_t *upin)
 391{
 392        register wchar_t *up;
 393
 394        up = upin;
 395        while (*up) {           /* For all characters */
 396                *up = UniTolower(*up);
 397                up++;
 398        }
 399        return upin;            /* Return input pointer */
 400}
 401
 402#endif
 403
 404#endif /* _CIFS_UNICODE_H */
 405