linux/fs/unicode/utf8-core.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#include <linux/module.h>
   3#include <linux/kernel.h>
   4#include <linux/string.h>
   5#include <linux/slab.h>
   6#include <linux/parser.h>
   7#include <linux/errno.h>
   8#include <linux/unicode.h>
   9#include <linux/stringhash.h>
  10
  11#include "utf8n.h"
  12
  13int utf8_validate(const struct unicode_map *um, const struct qstr *str)
  14{
  15        const struct utf8data *data = utf8nfdi(um->version);
  16
  17        if (utf8nlen(data, str->name, str->len) < 0)
  18                return -1;
  19        return 0;
  20}
  21EXPORT_SYMBOL(utf8_validate);
  22
  23int utf8_strncmp(const struct unicode_map *um,
  24                 const struct qstr *s1, const struct qstr *s2)
  25{
  26        const struct utf8data *data = utf8nfdi(um->version);
  27        struct utf8cursor cur1, cur2;
  28        int c1, c2;
  29
  30        if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
  31                return -EINVAL;
  32
  33        if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
  34                return -EINVAL;
  35
  36        do {
  37                c1 = utf8byte(&cur1);
  38                c2 = utf8byte(&cur2);
  39
  40                if (c1 < 0 || c2 < 0)
  41                        return -EINVAL;
  42                if (c1 != c2)
  43                        return 1;
  44        } while (c1);
  45
  46        return 0;
  47}
  48EXPORT_SYMBOL(utf8_strncmp);
  49
  50int utf8_strncasecmp(const struct unicode_map *um,
  51                     const struct qstr *s1, const struct qstr *s2)
  52{
  53        const struct utf8data *data = utf8nfdicf(um->version);
  54        struct utf8cursor cur1, cur2;
  55        int c1, c2;
  56
  57        if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
  58                return -EINVAL;
  59
  60        if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
  61                return -EINVAL;
  62
  63        do {
  64                c1 = utf8byte(&cur1);
  65                c2 = utf8byte(&cur2);
  66
  67                if (c1 < 0 || c2 < 0)
  68                        return -EINVAL;
  69                if (c1 != c2)
  70                        return 1;
  71        } while (c1);
  72
  73        return 0;
  74}
  75EXPORT_SYMBOL(utf8_strncasecmp);
  76
  77/* String cf is expected to be a valid UTF-8 casefolded
  78 * string.
  79 */
  80int utf8_strncasecmp_folded(const struct unicode_map *um,
  81                            const struct qstr *cf,
  82                            const struct qstr *s1)
  83{
  84        const struct utf8data *data = utf8nfdicf(um->version);
  85        struct utf8cursor cur1;
  86        int c1, c2;
  87        int i = 0;
  88
  89        if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
  90                return -EINVAL;
  91
  92        do {
  93                c1 = utf8byte(&cur1);
  94                c2 = cf->name[i++];
  95                if (c1 < 0)
  96                        return -EINVAL;
  97                if (c1 != c2)
  98                        return 1;
  99        } while (c1);
 100
 101        return 0;
 102}
 103EXPORT_SYMBOL(utf8_strncasecmp_folded);
 104
 105int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
 106                  unsigned char *dest, size_t dlen)
 107{
 108        const struct utf8data *data = utf8nfdicf(um->version);
 109        struct utf8cursor cur;
 110        size_t nlen = 0;
 111
 112        if (utf8ncursor(&cur, data, str->name, str->len) < 0)
 113                return -EINVAL;
 114
 115        for (nlen = 0; nlen < dlen; nlen++) {
 116                int c = utf8byte(&cur);
 117
 118                dest[nlen] = c;
 119                if (!c)
 120                        return nlen;
 121                if (c == -1)
 122                        break;
 123        }
 124        return -EINVAL;
 125}
 126EXPORT_SYMBOL(utf8_casefold);
 127
 128int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
 129                       struct qstr *str)
 130{
 131        const struct utf8data *data = utf8nfdicf(um->version);
 132        struct utf8cursor cur;
 133        int c;
 134        unsigned long hash = init_name_hash(salt);
 135
 136        if (utf8ncursor(&cur, data, str->name, str->len) < 0)
 137                return -EINVAL;
 138
 139        while ((c = utf8byte(&cur))) {
 140                if (c < 0)
 141                        return -EINVAL;
 142                hash = partial_name_hash((unsigned char)c, hash);
 143        }
 144        str->hash = end_name_hash(hash);
 145        return 0;
 146}
 147EXPORT_SYMBOL(utf8_casefold_hash);
 148
 149int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
 150                   unsigned char *dest, size_t dlen)
 151{
 152        const struct utf8data *data = utf8nfdi(um->version);
 153        struct utf8cursor cur;
 154        ssize_t nlen = 0;
 155
 156        if (utf8ncursor(&cur, data, str->name, str->len) < 0)
 157                return -EINVAL;
 158
 159        for (nlen = 0; nlen < dlen; nlen++) {
 160                int c = utf8byte(&cur);
 161
 162                dest[nlen] = c;
 163                if (!c)
 164                        return nlen;
 165                if (c == -1)
 166                        break;
 167        }
 168        return -EINVAL;
 169}
 170
 171EXPORT_SYMBOL(utf8_normalize);
 172
 173static int utf8_parse_version(const char *version, unsigned int *maj,
 174                              unsigned int *min, unsigned int *rev)
 175{
 176        substring_t args[3];
 177        char version_string[12];
 178        static const struct match_token token[] = {
 179                {1, "%d.%d.%d"},
 180                {0, NULL}
 181        };
 182
 183        strncpy(version_string, version, sizeof(version_string));
 184
 185        if (match_token(version_string, token, args) != 1)
 186                return -EINVAL;
 187
 188        if (match_int(&args[0], maj) || match_int(&args[1], min) ||
 189            match_int(&args[2], rev))
 190                return -EINVAL;
 191
 192        return 0;
 193}
 194
 195struct unicode_map *utf8_load(const char *version)
 196{
 197        struct unicode_map *um = NULL;
 198        int unicode_version;
 199
 200        if (version) {
 201                unsigned int maj, min, rev;
 202
 203                if (utf8_parse_version(version, &maj, &min, &rev) < 0)
 204                        return ERR_PTR(-EINVAL);
 205
 206                if (!utf8version_is_supported(maj, min, rev))
 207                        return ERR_PTR(-EINVAL);
 208
 209                unicode_version = UNICODE_AGE(maj, min, rev);
 210        } else {
 211                unicode_version = utf8version_latest();
 212                printk(KERN_WARNING"UTF-8 version not specified. "
 213                       "Assuming latest supported version (%d.%d.%d).",
 214                       (unicode_version >> 16) & 0xff,
 215                       (unicode_version >> 8) & 0xff,
 216                       (unicode_version & 0xff));
 217        }
 218
 219        um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
 220        if (!um)
 221                return ERR_PTR(-ENOMEM);
 222
 223        um->charset = "UTF-8";
 224        um->version = unicode_version;
 225
 226        return um;
 227}
 228EXPORT_SYMBOL(utf8_load);
 229
 230void utf8_unload(struct unicode_map *um)
 231{
 232        kfree(um);
 233}
 234EXPORT_SYMBOL(utf8_unload);
 235
 236MODULE_LICENSE("GPL v2");
 237