1
2
3
4
5
6
7
8
9#include "libbb.h"
10#include "unicode.h"
11
12
13#ifndef unicode_status
14uint8_t unicode_status;
15#endif
16
17
18
19
20
21
22#if ENABLE_UNICODE_USING_LOCALE
23
24
25
26void FAST_FUNC reinit_unicode(const char *LANG)
27{
28 static const char unicode_0x394[] ALIGN1 = { 0xce, 0x94, 0 };
29 size_t width;
30
31
32
33
34
35
36
37
38
39
40
41
42
43 setlocale(LC_CTYPE, LANG ? LANG : "");
44
45
46 width = unicode_strlen(unicode_0x394);
47 unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
48}
49
50void FAST_FUNC init_unicode(void)
51{
52
53
54
55
56 if (unicode_status == UNICODE_UNKNOWN) {
57 char *s = getenv("LC_ALL");
58 if (!s) s = getenv("LC_CTYPE");
59 if (!s) s = getenv("LANG");
60 reinit_unicode(s);
61 }
62}
63
64#else
65
66
67
68# if ENABLE_FEATURE_CHECK_UNICODE_IN_ENV
69void FAST_FUNC reinit_unicode(const char *LANG)
70{
71 unicode_status = UNICODE_OFF;
72 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
73 return;
74 unicode_status = UNICODE_ON;
75}
76
77void FAST_FUNC init_unicode(void)
78{
79 if (unicode_status == UNICODE_UNKNOWN) {
80 char *s = getenv("LC_ALL");
81 if (!s) s = getenv("LC_CTYPE");
82 if (!s) s = getenv("LANG");
83 reinit_unicode(s);
84 }
85}
86# endif
87
88static size_t wcrtomb_internal(char *s, wchar_t wc)
89{
90 int n, i;
91 uint32_t v = wc;
92
93 if (v <= 0x7f) {
94 *s = v;
95 return 1;
96 }
97
98
99
100
101
102
103
104
105
106
107
108 n = 2;
109
110 while (v >= 0x800 && n < 6) {
111 v >>= 5;
112 n++;
113 }
114
115 i = n;
116 while (--i) {
117 s[i] = (wc & 0x3f) | 0x80;
118 wc >>= 6;
119 }
120
121 s[0] = wc | (uint8_t)(0x3f00 >> n);
122 return n;
123}
124size_t FAST_FUNC wcrtomb(char *s, wchar_t wc, mbstate_t *ps UNUSED_PARAM)
125{
126 if (unicode_status != UNICODE_ON) {
127 *s = wc;
128 return 1;
129 }
130
131 return wcrtomb_internal(s, wc);
132}
133size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
134{
135 size_t org_n = n;
136
137 if (unicode_status != UNICODE_ON) {
138 while (n) {
139 wchar_t c = *src++;
140 *dest++ = c;
141 if (c == 0)
142 break;
143 n--;
144 }
145 return org_n - n;
146 }
147
148 while (n >= MB_CUR_MAX) {
149 wchar_t wc = *src++;
150 size_t len = wcrtomb_internal(dest, wc);
151
152 if (wc == L'\0')
153 return org_n - n;
154 dest += len;
155 n -= len;
156 }
157 while (n) {
158 char tbuf[MB_CUR_MAX];
159 wchar_t wc = *src++;
160 size_t len = wcrtomb_internal(tbuf, wc);
161
162 if (len > n)
163 break;
164 memcpy(dest, tbuf, len);
165 if (wc == L'\0')
166 return org_n - n;
167 dest += len;
168 n -= len;
169 }
170 return org_n - n;
171}
172
173# define ERROR_WCHAR (~(wchar_t)0)
174
175static const char *mbstowc_internal(wchar_t *res, const char *src)
176{
177 int bytes;
178 unsigned c = (unsigned char) *src++;
179
180 if (c <= 0x7f) {
181 *res = c;
182 return src;
183 }
184
185
186
187
188
189
190 bytes = 0;
191 do {
192 c <<= 1;
193 bytes++;
194 } while ((c & 0x80) && bytes < 6);
195 if (bytes == 1) {
196
197 *res = ERROR_WCHAR;
198 return src;
199 }
200 c = (uint8_t)(c) >> bytes;
201
202 while (--bytes) {
203 unsigned ch = (unsigned char) *src;
204 if ((ch & 0xc0) != 0x80) {
205
206 *res = ERROR_WCHAR;
207 return src;
208 }
209 c = (c << 6) + (ch & 0x3f);
210 src++;
211 }
212
213
214
215
216
217
218 if (c <= 0x7f) {
219 *res = ERROR_WCHAR;
220 return src;
221 }
222
223 *res = c;
224 return src;
225}
226size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n)
227{
228 size_t org_n = n;
229
230 if (unicode_status != UNICODE_ON) {
231 while (n) {
232 unsigned char c = *src++;
233
234 if (dest)
235 *dest++ = c;
236 if (c == 0)
237 break;
238 n--;
239 }
240 return org_n - n;
241 }
242
243 while (n) {
244 wchar_t wc;
245 src = mbstowc_internal(&wc, src);
246 if (wc == ERROR_WCHAR)
247 return (size_t) -1L;
248 if (dest)
249 *dest++ = wc;
250 if (wc == 0)
251 break;
252 n--;
253 }
254
255 return org_n - n;
256}
257
258int FAST_FUNC iswspace(wint_t wc)
259{
260 return (unsigned)wc <= 0x7f && isspace(wc);
261}
262
263int FAST_FUNC iswalnum(wint_t wc)
264{
265 return (unsigned)wc <= 0x7f && isalnum(wc);
266}
267
268int FAST_FUNC iswpunct(wint_t wc)
269{
270 return (unsigned)wc <= 0x7f && ispunct(wc);
271}
272
273
274# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
275struct interval {
276 uint16_t first;
277 uint16_t last;
278};
279
280
281static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
282{
283 unsigned min;
284 unsigned mid;
285
286 if (ucs < table[0].first || ucs > table[max].last)
287 return 0;
288
289 min = 0;
290 while (max >= min) {
291 mid = (min + max) / 2;
292 if (ucs > table[mid].last)
293 min = mid + 1;
294 else if (ucs < table[mid].first)
295 max = mid - 1;
296 else
297 return 1;
298 }
299 return 0;
300}
301
302static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
303{
304 unsigned min;
305 unsigned mid;
306 unsigned first, last;
307
308 first = table[0] >> 2;
309 if (ucs < first)
310 return 0;
311 last = (table[max] >> 2) + (table[max] & 3);
312 if (ucs > last)
313 return 0;
314
315 min = 0;
316 while (max >= min) {
317 mid = (min + max) / 2;
318 first = table[mid] >> 2;
319 last = first + (table[mid] & 3);
320 if (ucs > last)
321 min = mid + 1;
322 else if (ucs < first)
323 max = mid - 1;
324 else
325 return 1;
326 }
327 return 0;
328}
329# endif
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454int FAST_FUNC wcwidth(unsigned ucs)
455{
456# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
457
458
459# define BIG_(a,b) { a, b },
460# define PAIR(a,b)
461# define ARRAY \
462 BIG_(0x0300, 0x036F) \
463 PAIR(0x0483, 0x0486) \
464 PAIR(0x0488, 0x0489) \
465 BIG_(0x0591, 0x05BD) \
466 PAIR(0x05BF, 0x05BF) \
467 PAIR(0x05C1, 0x05C2) \
468 PAIR(0x05C4, 0x05C5) \
469 PAIR(0x05C7, 0x05C7) \
470 PAIR(0x0600, 0x0603) \
471 BIG_(0x0610, 0x0615) \
472 BIG_(0x064B, 0x065E) \
473 PAIR(0x0670, 0x0670) \
474 BIG_(0x06D6, 0x06E4) \
475 PAIR(0x06E7, 0x06E8) \
476 PAIR(0x06EA, 0x06ED) \
477 PAIR(0x070F, 0x070F) \
478 PAIR(0x0711, 0x0711) \
479 BIG_(0x0730, 0x074A) \
480 BIG_(0x07A6, 0x07B0) \
481 BIG_(0x07EB, 0x07F3) \
482 PAIR(0x0901, 0x0902) \
483 PAIR(0x093C, 0x093C) \
484 BIG_(0x0941, 0x0948) \
485 PAIR(0x094D, 0x094D) \
486 PAIR(0x0951, 0x0954) \
487 PAIR(0x0962, 0x0963) \
488 PAIR(0x0981, 0x0981) \
489 PAIR(0x09BC, 0x09BC) \
490 PAIR(0x09C1, 0x09C4) \
491 PAIR(0x09CD, 0x09CD) \
492 PAIR(0x09E2, 0x09E3) \
493 PAIR(0x0A01, 0x0A02) \
494 PAIR(0x0A3C, 0x0A3C) \
495 PAIR(0x0A41, 0x0A42) \
496 PAIR(0x0A47, 0x0A48) \
497 PAIR(0x0A4B, 0x0A4D) \
498 PAIR(0x0A70, 0x0A71) \
499 PAIR(0x0A81, 0x0A82) \
500 PAIR(0x0ABC, 0x0ABC) \
501 BIG_(0x0AC1, 0x0AC5) \
502 PAIR(0x0AC7, 0x0AC8) \
503 PAIR(0x0ACD, 0x0ACD) \
504 PAIR(0x0AE2, 0x0AE3) \
505 PAIR(0x0B01, 0x0B01) \
506 PAIR(0x0B3C, 0x0B3C) \
507 PAIR(0x0B3F, 0x0B3F) \
508 PAIR(0x0B41, 0x0B43) \
509 PAIR(0x0B4D, 0x0B4D) \
510 PAIR(0x0B56, 0x0B56) \
511 PAIR(0x0B82, 0x0B82) \
512 PAIR(0x0BC0, 0x0BC0) \
513 PAIR(0x0BCD, 0x0BCD) \
514 PAIR(0x0C3E, 0x0C40) \
515 PAIR(0x0C46, 0x0C48) \
516 PAIR(0x0C4A, 0x0C4D) \
517 PAIR(0x0C55, 0x0C56) \
518 PAIR(0x0CBC, 0x0CBC) \
519 PAIR(0x0CBF, 0x0CBF) \
520 PAIR(0x0CC6, 0x0CC6) \
521 PAIR(0x0CCC, 0x0CCD) \
522 PAIR(0x0CE2, 0x0CE3) \
523 PAIR(0x0D41, 0x0D43) \
524 PAIR(0x0D4D, 0x0D4D) \
525 PAIR(0x0DCA, 0x0DCA) \
526 PAIR(0x0DD2, 0x0DD4) \
527 PAIR(0x0DD6, 0x0DD6) \
528 PAIR(0x0E31, 0x0E31) \
529 BIG_(0x0E34, 0x0E3A) \
530 BIG_(0x0E47, 0x0E4E) \
531 PAIR(0x0EB1, 0x0EB1) \
532 BIG_(0x0EB4, 0x0EB9) \
533 PAIR(0x0EBB, 0x0EBC) \
534 BIG_(0x0EC8, 0x0ECD) \
535 PAIR(0x0F18, 0x0F19) \
536 PAIR(0x0F35, 0x0F35) \
537 PAIR(0x0F37, 0x0F37) \
538 PAIR(0x0F39, 0x0F39) \
539 BIG_(0x0F71, 0x0F7E) \
540 BIG_(0x0F80, 0x0F84) \
541 PAIR(0x0F86, 0x0F87) \
542 PAIR(0x0FC6, 0x0FC6) \
543 BIG_(0x0F90, 0x0F97) \
544 BIG_(0x0F99, 0x0FBC) \
545 PAIR(0x102D, 0x1030) \
546 PAIR(0x1032, 0x1032) \
547 PAIR(0x1036, 0x1037) \
548 PAIR(0x1039, 0x1039) \
549 PAIR(0x1058, 0x1059) \
550 BIG_(0x1160, 0x11FF) \
551 PAIR(0x135F, 0x135F) \
552 PAIR(0x1712, 0x1714) \
553 PAIR(0x1732, 0x1734) \
554 PAIR(0x1752, 0x1753) \
555 PAIR(0x1772, 0x1773) \
556 PAIR(0x17B4, 0x17B5) \
557 BIG_(0x17B7, 0x17BD) \
558 PAIR(0x17C6, 0x17C6) \
559 BIG_(0x17C9, 0x17D3) \
560 PAIR(0x17DD, 0x17DD) \
561 PAIR(0x180B, 0x180D) \
562 PAIR(0x18A9, 0x18A9) \
563 PAIR(0x1920, 0x1922) \
564 PAIR(0x1927, 0x1928) \
565 PAIR(0x1932, 0x1932) \
566 PAIR(0x1939, 0x193B) \
567 PAIR(0x1A17, 0x1A18) \
568 PAIR(0x1B00, 0x1B03) \
569 PAIR(0x1B34, 0x1B34) \
570 BIG_(0x1B36, 0x1B3A) \
571 PAIR(0x1B3C, 0x1B3C) \
572 PAIR(0x1B42, 0x1B42) \
573 BIG_(0x1B6B, 0x1B73) \
574 BIG_(0x1DC0, 0x1DCA) \
575 PAIR(0x1DFE, 0x1DFF) \
576 BIG_(0x200B, 0x200F) \
577 BIG_(0x202A, 0x202E) \
578 PAIR(0x2060, 0x2063) \
579 BIG_(0x206A, 0x206F) \
580 BIG_(0x20D0, 0x20EF) \
581 BIG_(0x302A, 0x302F) \
582 PAIR(0x3099, 0x309A) \
583 \
584 BIG_(0xA806, 0xA806) \
585 BIG_(0xA80B, 0xA80B) \
586 BIG_(0xA825, 0xA826) \
587 BIG_(0xFB1E, 0xFB1E) \
588 BIG_(0xFE00, 0xFE0F) \
589 BIG_(0xFE20, 0xFE23) \
590 BIG_(0xFEFF, 0xFEFF) \
591 BIG_(0xFFF9, 0xFFFB)
592 static const struct interval combining[] ALIGN4 = { ARRAY };
593# undef BIG_
594# undef PAIR
595# define BIG_(a,b)
596# define PAIR(a,b) (a << 2) | (b-a),
597 static const uint16_t combining1[] ALIGN2 = { ARRAY };
598# undef BIG_
599# undef PAIR
600# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
601# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
602 struct CHECK { ARRAY };
603# undef BIG_
604# undef PAIR
605# undef ARRAY
606# endif
607
608 if (ucs == 0)
609 return 0;
610
611
612 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
613 return -1;
614
615 if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
616 return -1;
617
618
619 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
620 return 1;
621
622# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
623
624 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
625 return 0;
626 if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
627 return 0;
628
629
630 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
631 return 1;
632
633# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
634
635
636
637
638 if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
639 || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
640 ) {
641 return -1;
642 }
643
644 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
645 return -1;
646 }
647
648# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
649 if (ucs >= 0x10000) {
650
651 static const struct interval combining0x10000[] ALIGN4 = {
652 { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
653 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
654 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
655 { 0xD242, 0xD244 }
656 };
657
658 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
659 return 0;
660
661 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
662 && ( ucs == 0xE0001
663 || (ucs >= 0xE0020 && ucs <= 0xE007F)
664 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
665 )
666 ) {
667 return 0;
668 }
669 }
670# endif
671
672
673
674
675 return 1 +
676 ( ( ucs <= 0x115f)
677 || ucs == 0x2329
678 || ucs == 0x232a
679# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x2e80
680 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f)
681# endif
682# if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
683 || (ucs >= 0xac00 && ucs <= 0xd7a3)
684# endif
685# if CONFIG_LAST_SUPPORTED_WCHAR >= 0xf900
686 || (ucs >= 0xf900 && ucs <= 0xfaff)
687 || (ucs >= 0xfe10 && ucs <= 0xfe19)
688 || (ucs >= 0xfe30 && ucs <= 0xfe6f)
689 || (ucs >= 0xff00 && ucs <= 0xff60)
690 || (ucs >= 0xffe0 && ucs <= 0xffe6)
691# endif
692# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000
693 || ((ucs >> 17) == (2 >> 1))
694# endif
695 );
696# endif
697# endif
698}
699
700
701# if ENABLE_UNICODE_BIDI_SUPPORT
702int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
703{
704
705
706
707
708# define BIG_(a,b) { a, b },
709# define PAIR(a,b)
710# define ARRAY \
711 PAIR(0x0590, 0x0590) \
712 PAIR(0x05BE, 0x05BE) \
713 PAIR(0x05C0, 0x05C0) \
714 PAIR(0x05C3, 0x05C3) \
715 PAIR(0x05C6, 0x05C6) \
716 BIG_(0x05C8, 0x05FF) \
717 PAIR(0x0604, 0x0605) \
718 PAIR(0x0608, 0x0608) \
719 PAIR(0x060B, 0x060B) \
720 PAIR(0x060D, 0x060D) \
721 BIG_(0x061B, 0x064A) \
722 PAIR(0x065F, 0x065F) \
723 PAIR(0x066D, 0x066F) \
724 BIG_(0x0671, 0x06D5) \
725 PAIR(0x06E5, 0x06E6) \
726 PAIR(0x06EE, 0x06EF) \
727 BIG_(0x06FA, 0x070E) \
728 PAIR(0x0710, 0x0710) \
729 BIG_(0x0712, 0x072F) \
730 BIG_(0x074B, 0x07A5) \
731 BIG_(0x07B1, 0x07EA) \
732 PAIR(0x07F4, 0x07F5) \
733 BIG_(0x07FA, 0x0815) \
734 PAIR(0x081A, 0x081A) \
735 PAIR(0x0824, 0x0824) \
736 PAIR(0x0828, 0x0828) \
737 BIG_(0x082E, 0x08FF) \
738 PAIR(0x200F, 0x200F) \
739 PAIR(0x202B, 0x202B) \
740 PAIR(0x202E, 0x202E) \
741 BIG_(0xFB1D, 0xFB1D) \
742 BIG_(0xFB1F, 0xFB28) \
743 BIG_(0xFB2A, 0xFD3D) \
744 BIG_(0xFD40, 0xFDCF) \
745 BIG_(0xFDC8, 0xFDCF) \
746 BIG_(0xFDF0, 0xFDFC) \
747 BIG_(0xFDFE, 0xFDFF) \
748 BIG_(0xFE70, 0xFEFE)
749
750
751
752
753
754
755
756
757
758
759
760
761
762 static const struct interval rtl_b[] ALIGN4 = { ARRAY };
763# undef BIG_
764# undef PAIR
765# define BIG_(a,b)
766# define PAIR(a,b) (a << 2) | (b-a),
767 static const uint16_t rtl_p[] ALIGN2 = { ARRAY };
768# undef BIG_
769# undef PAIR
770# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
771# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
772 struct CHECK { ARRAY };
773# undef BIG_
774# undef PAIR
775# undef ARRAY
776
777 if (in_interval_table(wc, rtl_b, ARRAY_SIZE(rtl_b) - 1))
778 return 1;
779 if (in_uint16_table(wc, rtl_p, ARRAY_SIZE(rtl_p) - 1))
780 return 1;
781 return 0;
782}
783
784# if ENABLE_UNICODE_NEUTRAL_TABLE
785int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
786{
787
788
789
790
791
792
793# define BIG_(a,b) { a, b },
794# define PAIR(a,b)
795# define ARRAY \
796 BIG_(0x0009, 0x000D) \
797 BIG_(0x001C, 0x0040) \
798 BIG_(0x005B, 0x0060) \
799 PAIR(0x007B, 0x007E) \
800 PAIR(0x0085, 0x0085) \
801 BIG_(0x00A0, 0x00A9) \
802 PAIR(0x00AB, 0x00AC) \
803 BIG_(0x00AE, 0x00B4) \
804 PAIR(0x00B6, 0x00B9) \
805 BIG_(0x00BB, 0x00BF) \
806 PAIR(0x00D7, 0x00D7) \
807 PAIR(0x00F7, 0x00F7) \
808 PAIR(0x02B9, 0x02BA) \
809 BIG_(0x02C2, 0x02CF) \
810 BIG_(0x02D2, 0x02DF) \
811 BIG_(0x02E5, 0x02FF) \
812 PAIR(0x0374, 0x0375) \
813 PAIR(0x037E, 0x037E) \
814 PAIR(0x0384, 0x0385) \
815 PAIR(0x0387, 0x0387) \
816 PAIR(0x03F6, 0x03F6) \
817 PAIR(0x058A, 0x058A) \
818 PAIR(0x0600, 0x0603) \
819 PAIR(0x0606, 0x0607) \
820 PAIR(0x0609, 0x060A) \
821 PAIR(0x060C, 0x060C) \
822 PAIR(0x060E, 0x060F) \
823 BIG_(0x0660, 0x066C) \
824 PAIR(0x06DD, 0x06DD) \
825 PAIR(0x06E9, 0x06E9) \
826 BIG_(0x06F0, 0x06F9) \
827 PAIR(0x07F6, 0x07F9) \
828 PAIR(0x09F2, 0x09F3) \
829 PAIR(0x09FB, 0x09FB) \
830 PAIR(0x0AF1, 0x0AF1) \
831 BIG_(0x0BF3, 0x0BFA) \
832 BIG_(0x0C78, 0x0C7E) \
833 PAIR(0x0CF1, 0x0CF2) \
834 PAIR(0x0E3F, 0x0E3F) \
835 PAIR(0x0F3A, 0x0F3D) \
836 BIG_(0x1390, 0x1400) \
837 PAIR(0x1680, 0x1680) \
838 PAIR(0x169B, 0x169C) \
839 PAIR(0x17DB, 0x17DB) \
840 BIG_(0x17F0, 0x17F9) \
841 BIG_(0x1800, 0x180A) \
842 PAIR(0x180E, 0x180E) \
843 PAIR(0x1940, 0x1940) \
844 PAIR(0x1944, 0x1945) \
845 BIG_(0x19DE, 0x19FF) \
846 PAIR(0x1FBD, 0x1FBD) \
847 PAIR(0x1FBF, 0x1FC1) \
848 PAIR(0x1FCD, 0x1FCF) \
849 PAIR(0x1FDD, 0x1FDF) \
850 PAIR(0x1FED, 0x1FEF) \
851 PAIR(0x1FFD, 0x1FFE) \
852 BIG_(0x2000, 0x200A) \
853 BIG_(0x2010, 0x2029) \
854 BIG_(0x202F, 0x205F) \
855 PAIR(0x2070, 0x2070) \
856 BIG_(0x2074, 0x207E) \
857 BIG_(0x2080, 0x208E) \
858 BIG_(0x20A0, 0x20B8) \
859 PAIR(0x2100, 0x2101) \
860 PAIR(0x2103, 0x2106) \
861 PAIR(0x2108, 0x2109) \
862 PAIR(0x2114, 0x2114) \
863 PAIR(0x2116, 0x2118) \
864 BIG_(0x211E, 0x2123) \
865 PAIR(0x2125, 0x2125) \
866 PAIR(0x2127, 0x2127) \
867 PAIR(0x2129, 0x2129) \
868 PAIR(0x212E, 0x212E) \
869 PAIR(0x213A, 0x213B) \
870 BIG_(0x2140, 0x2144) \
871 PAIR(0x214A, 0x214D) \
872 BIG_(0x2150, 0x215F) \
873 PAIR(0x2189, 0x2189) \
874 BIG_(0x2190, 0x2335) \
875 BIG_(0x237B, 0x2394) \
876 BIG_(0x2396, 0x23E8) \
877 BIG_(0x2400, 0x2426) \
878 BIG_(0x2440, 0x244A) \
879 BIG_(0x2460, 0x249B) \
880 BIG_(0x24EA, 0x26AB) \
881 BIG_(0x26AD, 0x26CD) \
882 BIG_(0x26CF, 0x26E1) \
883 PAIR(0x26E3, 0x26E3) \
884 BIG_(0x26E8, 0x26FF) \
885 PAIR(0x2701, 0x2704) \
886 PAIR(0x2706, 0x2709) \
887 BIG_(0x270C, 0x2727) \
888 BIG_(0x2729, 0x274B) \
889 PAIR(0x274D, 0x274D) \
890 PAIR(0x274F, 0x2752) \
891 BIG_(0x2756, 0x275E) \
892 BIG_(0x2761, 0x2794) \
893 BIG_(0x2798, 0x27AF) \
894 BIG_(0x27B1, 0x27BE) \
895 BIG_(0x27C0, 0x27CA) \
896 PAIR(0x27CC, 0x27CC) \
897 BIG_(0x27D0, 0x27FF) \
898 BIG_(0x2900, 0x2B4C) \
899 BIG_(0x2B50, 0x2B59) \
900 BIG_(0x2CE5, 0x2CEA) \
901 BIG_(0x2CF9, 0x2CFF) \
902 BIG_(0x2E00, 0x2E99) \
903 BIG_(0x2E9B, 0x2EF3) \
904 BIG_(0x2F00, 0x2FD5) \
905 BIG_(0x2FF0, 0x2FFB) \
906 BIG_(0x3000, 0x3004) \
907 BIG_(0x3008, 0x3020) \
908 PAIR(0x3030, 0x3030) \
909 PAIR(0x3036, 0x3037) \
910 PAIR(0x303D, 0x303D) \
911 PAIR(0x303E, 0x303F) \
912 PAIR(0x309B, 0x309C) \
913 PAIR(0x30A0, 0x30A0) \
914 PAIR(0x30FB, 0x30FB) \
915 BIG_(0x31C0, 0x31E3) \
916 PAIR(0x321D, 0x321E) \
917 BIG_(0x3250, 0x325F) \
918 PAIR(0x327C, 0x327E) \
919 BIG_(0x32B1, 0x32BF) \
920 PAIR(0x32CC, 0x32CF) \
921 PAIR(0x3377, 0x337A) \
922 PAIR(0x33DE, 0x33DF) \
923 PAIR(0x33FF, 0x33FF) \
924 BIG_(0x4DC0, 0x4DFF) \
925 BIG_(0xA490, 0xA4C6) \
926 BIG_(0xA60D, 0xA60F) \
927 BIG_(0xA673, 0xA673) \
928 BIG_(0xA67E, 0xA67F) \
929 BIG_(0xA700, 0xA721) \
930 BIG_(0xA788, 0xA788) \
931 BIG_(0xA828, 0xA82B) \
932 BIG_(0xA838, 0xA839) \
933 BIG_(0xA874, 0xA877) \
934 BIG_(0xFB29, 0xFB29) \
935 BIG_(0xFD3E, 0xFD3F) \
936 BIG_(0xFDFD, 0xFDFD) \
937 BIG_(0xFE10, 0xFE19) \
938 BIG_(0xFE30, 0xFE52) \
939 BIG_(0xFE54, 0xFE66) \
940 BIG_(0xFE68, 0xFE6B) \
941 BIG_(0xFF01, 0xFF20) \
942 BIG_(0xFF3B, 0xFF40) \
943 BIG_(0xFF5B, 0xFF65) \
944 BIG_(0xFFE0, 0xFFE6) \
945 BIG_(0xFFE8, 0xFFEE) \
946 BIG_(0xFFF9, 0xFFFD)
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966 static const struct interval neutral_b[] ALIGN4 = { ARRAY };
967# undef BIG_
968# undef PAIR
969# define BIG_(a,b)
970# define PAIR(a,b) (a << 2) | (b-a),
971 static const uint16_t neutral_p[] ALIGN2 = { ARRAY };
972# undef BIG_
973# undef PAIR
974# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
975# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
976 struct CHECK { ARRAY };
977# undef BIG_
978# undef PAIR
979# undef ARRAY
980
981 if (in_interval_table(wc, neutral_b, ARRAY_SIZE(neutral_b) - 1))
982 return 1;
983 if (in_uint16_table(wc, neutral_p, ARRAY_SIZE(neutral_p) - 1))
984 return 1;
985 return 0;
986}
987# endif
988
989# endif
990
991#endif
992
993
994
995
996size_t FAST_FUNC unicode_strlen(const char *string)
997{
998 size_t width = mbstowcs(NULL, string, INT_MAX);
999 if (width == (size_t)-1L)
1000 return strlen(string);
1001 return width;
1002}
1003
1004size_t FAST_FUNC unicode_strwidth(const char *string)
1005{
1006 uni_stat_t uni_stat;
1007 printable_string2(&uni_stat, string);
1008 return uni_stat.unicode_width;
1009}
1010
1011static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
1012{
1013 char *dst;
1014 unsigned dst_len;
1015 unsigned uni_count;
1016 unsigned uni_width;
1017
1018 if (unicode_status != UNICODE_ON) {
1019 char *d;
1020 if (flags & UNI_FLAG_PAD) {
1021 d = dst = xmalloc(width + 1);
1022 while ((int)--width >= 0) {
1023 unsigned char c = *src;
1024 if (c == '\0') {
1025 do
1026 *d++ = ' ';
1027 while ((int)--width >= 0);
1028 break;
1029 }
1030 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
1031 src++;
1032 }
1033 *d = '\0';
1034 } else {
1035 d = dst = xstrndup(src, width);
1036 while (*d) {
1037 unsigned char c = *d;
1038 if (c < ' ' || c >= 0x7f)
1039 *d = '?';
1040 d++;
1041 }
1042 }
1043 if (stats) {
1044 stats->byte_count = (d - dst);
1045 stats->unicode_count = (d - dst);
1046 stats->unicode_width = (d - dst);
1047 }
1048 return dst;
1049 }
1050
1051 dst = NULL;
1052 uni_count = uni_width = 0;
1053 dst_len = 0;
1054 while (1) {
1055 int w;
1056 wchar_t wc;
1057
1058#if ENABLE_UNICODE_USING_LOCALE
1059 {
1060 mbstate_t mbst = { 0 };
1061 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
1062
1063
1064
1065
1066
1067
1068
1069
1070 if (rc == 0)
1071 break;
1072 if (rc < 0) {
1073 src++;
1074 goto subst;
1075 }
1076 if (!iswprint(wc))
1077 goto subst;
1078 }
1079#else
1080 src = mbstowc_internal(&wc, src);
1081
1082
1083
1084
1085 if (wc == ERROR_WCHAR)
1086 goto subst;
1087 if (wc == 0)
1088 break;
1089#endif
1090 if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
1091 goto subst;
1092 w = wcwidth(wc);
1093 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0)
1094 || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
1095 || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
1096 ) {
1097 subst:
1098 wc = CONFIG_SUBST_WCHAR;
1099 w = 1;
1100 }
1101 width -= w;
1102
1103
1104 if ((int)width < 0) {
1105
1106 width += w;
1107 break;
1108 }
1109
1110 uni_count++;
1111 uni_width += w;
1112 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
1113#if ENABLE_UNICODE_USING_LOCALE
1114 {
1115 mbstate_t mbst = { 0 };
1116 dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
1117 }
1118#else
1119 dst_len += wcrtomb_internal(&dst[dst_len], wc);
1120#endif
1121 }
1122
1123
1124 if (flags & UNI_FLAG_PAD) {
1125 dst = xrealloc(dst, dst_len + width + 1);
1126 uni_count += width;
1127 uni_width += width;
1128 while ((int)--width >= 0) {
1129 dst[dst_len++] = ' ';
1130 }
1131 }
1132 if (!dst)
1133 dst = xzalloc(1);
1134 dst[dst_len] = '\0';
1135 if (stats) {
1136 stats->byte_count = dst_len;
1137 stats->unicode_count = uni_count;
1138 stats->unicode_width = uni_width;
1139 }
1140
1141 return dst;
1142}
1143char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
1144{
1145 return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
1146}
1147char* FAST_FUNC unicode_conv_to_printable_fixedwidth( const char *src, unsigned width)
1148{
1149 return unicode_conv_to_printable2( NULL, src, width, UNI_FLAG_PAD);
1150}
1151
1152#ifdef UNUSED
1153char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
1154{
1155 return unicode_conv_to_printable2(stats, src, maxwidth, 0);
1156}
1157
1158unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
1159{
1160 if (unicode_status != UNICODE_ON) {
1161 return width - strnlen(src, width);
1162 }
1163
1164 while (1) {
1165 int w;
1166 wchar_t wc;
1167
1168#if ENABLE_UNICODE_USING_LOCALE
1169 {
1170 mbstate_t mbst = { 0 };
1171 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
1172 if (rc <= 0)
1173 return width;
1174 }
1175#else
1176 src = mbstowc_internal(&wc, src);
1177 if (wc == ERROR_WCHAR || wc == 0)
1178 return width;
1179#endif
1180 w = wcwidth(wc);
1181 if (w < 0)
1182 return width;
1183 width -= w;
1184 if ((int)width <= 0)
1185 return 0;
1186 }
1187}
1188#endif
1189