uboot/include/linux/utf.h
<<
>>
Prefs
   1#ifndef _LINUX_UTF_H
   2#define _LINUX_UTF_H
   3
   4#include <asm/unaligned.h>
   5
   6static inline int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
   7{
   8        int     count = 0;
   9        u8      c;
  10        u16     uchar;
  11
  12        /*
  13         * this insists on correct encodings, though not minimal ones.
  14         * BUT it currently rejects legit 4-byte UTF-8 code points,
  15         * which need surrogate pairs.  (Unicode 3.1 can use them.)
  16         */
  17        while (len != 0 && (c = (u8) *s++) != 0) {
  18                if ((c & 0x80)) {
  19                        /*
  20                         * 2-byte sequence:
  21                         * 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
  22                         */
  23                        if ((c & 0xe0) == 0xc0) {
  24                                uchar = (c & 0x1f) << 6;
  25
  26                                c = (u8) *s++;
  27                                if ((c & 0xc0) != 0x80)
  28                                        goto fail;
  29                                c &= 0x3f;
  30                                uchar |= c;
  31
  32                        /*
  33                         * 3-byte sequence (most CJKV characters):
  34                         * zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
  35                         */
  36                        } else if ((c & 0xf0) == 0xe0) {
  37                                uchar = (c & 0x0f) << 12;
  38
  39                                c = (u8) *s++;
  40                                if ((c & 0xc0) != 0x80)
  41                                        goto fail;
  42                                c &= 0x3f;
  43                                uchar |= c << 6;
  44
  45                                c = (u8) *s++;
  46                                if ((c & 0xc0) != 0x80)
  47                                        goto fail;
  48                                c &= 0x3f;
  49                                uchar |= c;
  50
  51                                /* no bogus surrogates */
  52                                if (0xd800 <= uchar && uchar <= 0xdfff)
  53                                        goto fail;
  54
  55                        /*
  56                         * 4-byte sequence (surrogate pairs, currently rare):
  57                         * 11101110wwwwzzzzyy + 110111yyyyxxxxxx
  58                         *     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
  59                         * (uuuuu = wwww + 1)
  60                         * FIXME accept the surrogate code points (only)
  61                         */
  62                        } else
  63                                goto fail;
  64                } else
  65                        uchar = c;
  66                put_unaligned_le16(uchar, cp++);
  67                count++;
  68                len--;
  69        }
  70        return count;
  71fail:
  72        return -1;
  73}
  74
  75#endif /* _LINUX_UTF_H */
  76