1
2
3
4
5
6
7
8
9
10
11#include <linux/module.h>
12#include <linux/string.h>
13#include <linux/nls.h>
14#include <linux/kernel.h>
15#include <linux/errno.h>
16#include <linux/kmod.h>
17#include <linux/spinlock.h>
18#include <asm/byteorder.h>
19
20static struct nls_table default_table;
21static struct nls_table *tables = &default_table;
22static DEFINE_SPINLOCK(nls_lock);
23
24
25
26
27
28struct utf8_table {
29 int cmask;
30 int cval;
31 int shift;
32 long lmask;
33 long lval;
34};
35
36static const struct utf8_table utf8_table[] =
37{
38 {0x80, 0x00, 0*6, 0x7F, 0, },
39 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, },
40 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, },
41 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, },
42 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, },
43 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, },
44 {0, }
45};
46
47#define UNICODE_MAX 0x0010ffff
48#define PLANE_SIZE 0x00010000
49
50#define SURROGATE_MASK 0xfffff800
51#define SURROGATE_PAIR 0x0000d800
52#define SURROGATE_LOW 0x00000400
53#define SURROGATE_BITS 0x000003ff
54
55int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
56{
57 unsigned long l;
58 int c0, c, nc;
59 const struct utf8_table *t;
60
61 nc = 0;
62 c0 = *s;
63 l = c0;
64 for (t = utf8_table; t->cmask; t++) {
65 nc++;
66 if ((c0 & t->cmask) == t->cval) {
67 l &= t->lmask;
68 if (l < t->lval || l > UNICODE_MAX ||
69 (l & SURROGATE_MASK) == SURROGATE_PAIR)
70 return -1;
71 *pu = (unicode_t) l;
72 return nc;
73 }
74 if (len <= nc)
75 return -1;
76 s++;
77 c = (*s ^ 0x80) & 0xFF;
78 if (c & 0xC0)
79 return -1;
80 l = (l << 6) | c;
81 }
82 return -1;
83}
84EXPORT_SYMBOL(utf8_to_utf32);
85
86int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
87{
88 unsigned long l;
89 int c, nc;
90 const struct utf8_table *t;
91
92 if (!s)
93 return 0;
94
95 l = u;
96 if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
97 return -1;
98
99 nc = 0;
100 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
101 nc++;
102 if (l <= t->lmask) {
103 c = t->shift;
104 *s = (u8) (t->cval | (l >> c));
105 while (c > 0) {
106 c -= 6;
107 s++;
108 *s = (u8) (0x80 | ((l >> c) & 0x3F));
109 }
110 return nc;
111 }
112 }
113 return -1;
114}
115EXPORT_SYMBOL(utf32_to_utf8);
116
117int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
118{
119 u16 *op;
120 int size;
121 unicode_t u;
122
123 op = pwcs;
124 while (*s && len > 0) {
125 if (*s & 0x80) {
126 size = utf8_to_utf32(s, len, &u);
127 if (size < 0)
128 return -EINVAL;
129
130 if (u >= PLANE_SIZE) {
131 u -= PLANE_SIZE;
132 *op++ = (wchar_t) (SURROGATE_PAIR |
133 ((u >> 10) & SURROGATE_BITS));
134 *op++ = (wchar_t) (SURROGATE_PAIR |
135 SURROGATE_LOW |
136 (u & SURROGATE_BITS));
137 } else {
138 *op++ = (wchar_t) u;
139 }
140 s += size;
141 len -= size;
142 } else {
143 *op++ = *s++;
144 len--;
145 }
146 }
147 return op - pwcs;
148}
149EXPORT_SYMBOL(utf8s_to_utf16s);
150
151static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
152{
153 switch (endian) {
154 default:
155 return c;
156 case UTF16_LITTLE_ENDIAN:
157 return __le16_to_cpu(c);
158 case UTF16_BIG_ENDIAN:
159 return __be16_to_cpu(c);
160 }
161}
162
163int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
164 u8 *s, int maxlen)
165{
166 u8 *op;
167 int size;
168 unsigned long u, v;
169
170 op = s;
171 while (len > 0 && maxlen > 0) {
172 u = get_utf16(*pwcs, endian);
173 if (!u)
174 break;
175 pwcs++;
176 len--;
177 if (u > 0x7f) {
178 if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
179 if (u & SURROGATE_LOW) {
180
181 continue;
182 }
183 if (len <= 0)
184 break;
185 v = get_utf16(*pwcs, endian);
186 if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
187 !(v & SURROGATE_LOW)) {
188
189 continue;
190 }
191 u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
192 + (v & SURROGATE_BITS);
193 pwcs++;
194 len--;
195 }
196 size = utf32_to_utf8(u, op, maxlen);
197 if (size == -1) {
198
199 } else {
200 op += size;
201 maxlen -= size;
202 }
203 } else {
204 *op++ = (u8) u;
205 maxlen--;
206 }
207 }
208 return op - s;
209}
210EXPORT_SYMBOL(utf16s_to_utf8s);
211
212int register_nls(struct nls_table * nls)
213{
214 struct nls_table ** tmp = &tables;
215
216 if (nls->next)
217 return -EBUSY;
218
219 spin_lock(&nls_lock);
220 while (*tmp) {
221 if (nls == *tmp) {
222 spin_unlock(&nls_lock);
223 return -EBUSY;
224 }
225 tmp = &(*tmp)->next;
226 }
227 nls->next = tables;
228 tables = nls;
229 spin_unlock(&nls_lock);
230 return 0;
231}
232
233int unregister_nls(struct nls_table * nls)
234{
235 struct nls_table ** tmp = &tables;
236
237 spin_lock(&nls_lock);
238 while (*tmp) {
239 if (nls == *tmp) {
240 *tmp = nls->next;
241 spin_unlock(&nls_lock);
242 return 0;
243 }
244 tmp = &(*tmp)->next;
245 }
246 spin_unlock(&nls_lock);
247 return -EINVAL;
248}
249
250static struct nls_table *find_nls(char *charset)
251{
252 struct nls_table *nls;
253 spin_lock(&nls_lock);
254 for (nls = tables; nls; nls = nls->next) {
255 if (!strcmp(nls->charset, charset))
256 break;
257 if (nls->alias && !strcmp(nls->alias, charset))
258 break;
259 }
260 if (nls && !try_module_get(nls->owner))
261 nls = NULL;
262 spin_unlock(&nls_lock);
263 return nls;
264}
265
266struct nls_table *load_nls(char *charset)
267{
268 return try_then_request_module(find_nls(charset), "nls_%s", charset);
269}
270
271void unload_nls(struct nls_table *nls)
272{
273 if (nls)
274 module_put(nls->owner);
275}
276
277static const wchar_t charset2uni[256] = {
278
279 0x0000, 0x0001, 0x0002, 0x0003,
280 0x0004, 0x0005, 0x0006, 0x0007,
281 0x0008, 0x0009, 0x000a, 0x000b,
282 0x000c, 0x000d, 0x000e, 0x000f,
283
284 0x0010, 0x0011, 0x0012, 0x0013,
285 0x0014, 0x0015, 0x0016, 0x0017,
286 0x0018, 0x0019, 0x001a, 0x001b,
287 0x001c, 0x001d, 0x001e, 0x001f,
288
289 0x0020, 0x0021, 0x0022, 0x0023,
290 0x0024, 0x0025, 0x0026, 0x0027,
291 0x0028, 0x0029, 0x002a, 0x002b,
292 0x002c, 0x002d, 0x002e, 0x002f,
293
294 0x0030, 0x0031, 0x0032, 0x0033,
295 0x0034, 0x0035, 0x0036, 0x0037,
296 0x0038, 0x0039, 0x003a, 0x003b,
297 0x003c, 0x003d, 0x003e, 0x003f,
298
299 0x0040, 0x0041, 0x0042, 0x0043,
300 0x0044, 0x0045, 0x0046, 0x0047,
301 0x0048, 0x0049, 0x004a, 0x004b,
302 0x004c, 0x004d, 0x004e, 0x004f,
303
304 0x0050, 0x0051, 0x0052, 0x0053,
305 0x0054, 0x0055, 0x0056, 0x0057,
306 0x0058, 0x0059, 0x005a, 0x005b,
307 0x005c, 0x005d, 0x005e, 0x005f,
308
309 0x0060, 0x0061, 0x0062, 0x0063,
310 0x0064, 0x0065, 0x0066, 0x0067,
311 0x0068, 0x0069, 0x006a, 0x006b,
312 0x006c, 0x006d, 0x006e, 0x006f,
313
314 0x0070, 0x0071, 0x0072, 0x0073,
315 0x0074, 0x0075, 0x0076, 0x0077,
316 0x0078, 0x0079, 0x007a, 0x007b,
317 0x007c, 0x007d, 0x007e, 0x007f,
318
319 0x0080, 0x0081, 0x0082, 0x0083,
320 0x0084, 0x0085, 0x0086, 0x0087,
321 0x0088, 0x0089, 0x008a, 0x008b,
322 0x008c, 0x008d, 0x008e, 0x008f,
323
324 0x0090, 0x0091, 0x0092, 0x0093,
325 0x0094, 0x0095, 0x0096, 0x0097,
326 0x0098, 0x0099, 0x009a, 0x009b,
327 0x009c, 0x009d, 0x009e, 0x009f,
328
329 0x00a0, 0x00a1, 0x00a2, 0x00a3,
330 0x00a4, 0x00a5, 0x00a6, 0x00a7,
331 0x00a8, 0x00a9, 0x00aa, 0x00ab,
332 0x00ac, 0x00ad, 0x00ae, 0x00af,
333
334 0x00b0, 0x00b1, 0x00b2, 0x00b3,
335 0x00b4, 0x00b5, 0x00b6, 0x00b7,
336 0x00b8, 0x00b9, 0x00ba, 0x00bb,
337 0x00bc, 0x00bd, 0x00be, 0x00bf,
338
339 0x00c0, 0x00c1, 0x00c2, 0x00c3,
340 0x00c4, 0x00c5, 0x00c6, 0x00c7,
341 0x00c8, 0x00c9, 0x00ca, 0x00cb,
342 0x00cc, 0x00cd, 0x00ce, 0x00cf,
343
344 0x00d0, 0x00d1, 0x00d2, 0x00d3,
345 0x00d4, 0x00d5, 0x00d6, 0x00d7,
346 0x00d8, 0x00d9, 0x00da, 0x00db,
347 0x00dc, 0x00dd, 0x00de, 0x00df,
348
349 0x00e0, 0x00e1, 0x00e2, 0x00e3,
350 0x00e4, 0x00e5, 0x00e6, 0x00e7,
351 0x00e8, 0x00e9, 0x00ea, 0x00eb,
352 0x00ec, 0x00ed, 0x00ee, 0x00ef,
353
354 0x00f0, 0x00f1, 0x00f2, 0x00f3,
355 0x00f4, 0x00f5, 0x00f6, 0x00f7,
356 0x00f8, 0x00f9, 0x00fa, 0x00fb,
357 0x00fc, 0x00fd, 0x00fe, 0x00ff,
358};
359
360static const unsigned char page00[256] = {
361 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
362 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
363 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
364 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
365 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
366 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
367 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
368 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
369 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
370 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
371 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
372 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
373 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
374 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
375 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
376 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
377
378 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
379 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
380 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
381 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
382 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
383 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
384 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
385 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
386 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
387 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
388 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
389 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
390 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
391 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
392 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
393 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
394};
395
396static const unsigned char *const page_uni2charset[256] = {
397 page00
398};
399
400static const unsigned char charset2lower[256] = {
401 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
402 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
403 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
404 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
405 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
406 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
407 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
408 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
409 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
410 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
411 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
412 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
413 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
414 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
415 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
416 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
417
418 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
419 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
420 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
421 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
422 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
423 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
424 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
425 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
426 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
427 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
428 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
429 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
430 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
431 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
432 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
433 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
434};
435
436static const unsigned char charset2upper[256] = {
437 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
438 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
439 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
440 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
441 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
442 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
443 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
444 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
445 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
446 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
447 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
448 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
449 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
450 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
451 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
452 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
453
454 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
455 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
456 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
457 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
458 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
459 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
460 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
461 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
462 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
463 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
464 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
465 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
466 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
467 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
468 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
469 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
470};
471
472
473static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
474{
475 const unsigned char *uni2charset;
476 unsigned char cl = uni & 0x00ff;
477 unsigned char ch = (uni & 0xff00) >> 8;
478
479 if (boundlen <= 0)
480 return -ENAMETOOLONG;
481
482 uni2charset = page_uni2charset[ch];
483 if (uni2charset && uni2charset[cl])
484 out[0] = uni2charset[cl];
485 else
486 return -EINVAL;
487 return 1;
488}
489
490static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
491{
492 *uni = charset2uni[*rawstring];
493 if (*uni == 0x0000)
494 return -EINVAL;
495 return 1;
496}
497
498static struct nls_table default_table = {
499 .charset = "default",
500 .uni2char = uni2char,
501 .char2uni = char2uni,
502 .charset2lower = charset2lower,
503 .charset2upper = charset2upper,
504};
505
506
507struct nls_table *load_nls_default(void)
508{
509 struct nls_table *default_nls;
510
511 default_nls = load_nls(CONFIG_NLS_DEFAULT);
512 if (default_nls != NULL)
513 return default_nls;
514 else
515 return &default_table;
516}
517
518EXPORT_SYMBOL(register_nls);
519EXPORT_SYMBOL(unregister_nls);
520EXPORT_SYMBOL(unload_nls);
521EXPORT_SYMBOL(load_nls);
522EXPORT_SYMBOL(load_nls_default);
523
524MODULE_LICENSE("Dual BSD/GPL");
525