1
2
3
4
5
6
7
8
9#include "libbb.h"
10#include "unicode.h"
11
12
13#ifndef unicode_status
14uint8_t unicode_status;
15#endif
16
17
18
19
20
21
22#if ENABLE_UNICODE_USING_LOCALE
23
24
25
26void FAST_FUNC reinit_unicode(const char *LANG)
27{
28 static const char unicode_0x394[] = { 0xce, 0x94, 0 };
29 size_t width;
30
31
32 setlocale(LC_ALL, (LANG && LANG[0]) ? LANG : "C");
33
34
35
36 width = mbstowcs(NULL, unicode_0x394, INT_MAX);
37 unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
38}
39
40void FAST_FUNC init_unicode(void)
41{
42 if (unicode_status == UNICODE_UNKNOWN)
43 reinit_unicode(getenv("LANG"));
44}
45
46#else
47
48
49
50# if ENABLE_FEATURE_CHECK_UNICODE_IN_ENV
51void FAST_FUNC reinit_unicode(const char *LANG)
52{
53 unicode_status = UNICODE_OFF;
54 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
55 return;
56 unicode_status = UNICODE_ON;
57}
58
59void FAST_FUNC init_unicode(void)
60{
61 if (unicode_status == UNICODE_UNKNOWN)
62 reinit_unicode(getenv("LANG"));
63}
64# endif
65
66static size_t wcrtomb_internal(char *s, wchar_t wc)
67{
68 int n, i;
69 uint32_t v = wc;
70
71 if (v <= 0x7f) {
72 *s = v;
73 return 1;
74 }
75
76
77
78
79
80
81
82
83
84
85
86 n = 2;
87
88 while (v >= 0x800 && n < 6) {
89 v >>= 5;
90 n++;
91 }
92
93 i = n;
94 while (--i) {
95 s[i] = (wc & 0x3f) | 0x80;
96 wc >>= 6;
97 }
98
99 s[0] = wc | (uint8_t)(0x3f00 >> n);
100 return n;
101}
102size_t FAST_FUNC wcrtomb(char *s, wchar_t wc, mbstate_t *ps UNUSED_PARAM)
103{
104 if (unicode_status != UNICODE_ON) {
105 *s = wc;
106 return 1;
107 }
108
109 return wcrtomb_internal(s, wc);
110}
111size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
112{
113 size_t org_n = n;
114
115 if (unicode_status != UNICODE_ON) {
116 while (n) {
117 wchar_t c = *src++;
118 *dest++ = c;
119 if (c == 0)
120 break;
121 n--;
122 }
123 return org_n - n;
124 }
125
126 while (n >= MB_CUR_MAX) {
127 wchar_t wc = *src++;
128 size_t len = wcrtomb_internal(dest, wc);
129
130 if (wc == L'\0')
131 return org_n - n;
132 dest += len;
133 n -= len;
134 }
135 while (n) {
136 char tbuf[MB_CUR_MAX];
137 wchar_t wc = *src++;
138 size_t len = wcrtomb_internal(tbuf, wc);
139
140 if (len > n)
141 break;
142 memcpy(dest, tbuf, len);
143 if (wc == L'\0')
144 return org_n - n;
145 dest += len;
146 n -= len;
147 }
148 return org_n - n;
149}
150
151# define ERROR_WCHAR (~(wchar_t)0)
152
153static const char *mbstowc_internal(wchar_t *res, const char *src)
154{
155 int bytes;
156 unsigned c = (unsigned char) *src++;
157
158 if (c <= 0x7f) {
159 *res = c;
160 return src;
161 }
162
163
164
165
166
167
168 bytes = 0;
169 do {
170 c <<= 1;
171 bytes++;
172 } while ((c & 0x80) && bytes < 6);
173 if (bytes == 1) {
174
175 *res = ERROR_WCHAR;
176 return src;
177 }
178 c = (uint8_t)(c) >> bytes;
179
180 while (--bytes) {
181 unsigned ch = (unsigned char) *src;
182 if ((ch & 0xc0) != 0x80) {
183
184 *res = ERROR_WCHAR;
185 return src;
186 }
187 c = (c << 6) + (ch & 0x3f);
188 src++;
189 }
190
191
192
193
194
195
196 if (c <= 0x7f) {
197 *res = ERROR_WCHAR;
198 return src;
199 }
200
201 *res = c;
202 return src;
203}
204size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n)
205{
206 size_t org_n = n;
207
208 if (unicode_status != UNICODE_ON) {
209 while (n) {
210 unsigned char c = *src++;
211
212 if (dest)
213 *dest++ = c;
214 if (c == 0)
215 break;
216 n--;
217 }
218 return org_n - n;
219 }
220
221 while (n) {
222 wchar_t wc;
223 src = mbstowc_internal(&wc, src);
224 if (wc == ERROR_WCHAR)
225 return (size_t) -1L;
226 if (dest)
227 *dest++ = wc;
228 if (wc == 0)
229 break;
230 n--;
231 }
232
233 return org_n - n;
234}
235
236int FAST_FUNC iswspace(wint_t wc)
237{
238 return (unsigned)wc <= 0x7f && isspace(wc);
239}
240
241int FAST_FUNC iswalnum(wint_t wc)
242{
243 return (unsigned)wc <= 0x7f && isalnum(wc);
244}
245
246int FAST_FUNC iswpunct(wint_t wc)
247{
248 return (unsigned)wc <= 0x7f && ispunct(wc);
249}
250
251
252# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
253struct interval {
254 uint16_t first;
255 uint16_t last;
256};
257
258
259static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
260{
261 unsigned min;
262 unsigned mid;
263
264 if (ucs < table[0].first || ucs > table[max].last)
265 return 0;
266
267 min = 0;
268 while (max >= min) {
269 mid = (min + max) / 2;
270 if (ucs > table[mid].last)
271 min = mid + 1;
272 else if (ucs < table[mid].first)
273 max = mid - 1;
274 else
275 return 1;
276 }
277 return 0;
278}
279
280static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
281{
282 unsigned min;
283 unsigned mid;
284 unsigned first, last;
285
286 first = table[0] >> 2;
287 last = first + (table[0] & 3);
288 if (ucs < first || ucs > last)
289 return 0;
290
291 min = 0;
292 while (max >= min) {
293 mid = (min + max) / 2;
294 first = table[mid] >> 2;
295 last = first + (table[mid] & 3);
296 if (ucs > last)
297 min = mid + 1;
298 else if (ucs < first)
299 max = mid - 1;
300 else
301 return 1;
302 }
303 return 0;
304}
305# endif
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430int FAST_FUNC wcwidth(unsigned ucs)
431{
432# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
433
434
435# define BIG_(a,b) { a, b },
436# define PAIR(a,b)
437# define ARRAY \
438 BIG_(0x0300, 0x036F) \
439 PAIR(0x0483, 0x0486) \
440 PAIR(0x0488, 0x0489) \
441 BIG_(0x0591, 0x05BD) \
442 PAIR(0x05BF, 0x05BF) \
443 PAIR(0x05C1, 0x05C2) \
444 PAIR(0x05C4, 0x05C5) \
445 PAIR(0x05C7, 0x05C7) \
446 PAIR(0x0600, 0x0603) \
447 BIG_(0x0610, 0x0615) \
448 BIG_(0x064B, 0x065E) \
449 PAIR(0x0670, 0x0670) \
450 BIG_(0x06D6, 0x06E4) \
451 PAIR(0x06E7, 0x06E8) \
452 PAIR(0x06EA, 0x06ED) \
453 PAIR(0x070F, 0x070F) \
454 PAIR(0x0711, 0x0711) \
455 BIG_(0x0730, 0x074A) \
456 BIG_(0x07A6, 0x07B0) \
457 BIG_(0x07EB, 0x07F3) \
458 PAIR(0x0901, 0x0902) \
459 PAIR(0x093C, 0x093C) \
460 BIG_(0x0941, 0x0948) \
461 PAIR(0x094D, 0x094D) \
462 PAIR(0x0951, 0x0954) \
463 PAIR(0x0962, 0x0963) \
464 PAIR(0x0981, 0x0981) \
465 PAIR(0x09BC, 0x09BC) \
466 PAIR(0x09C1, 0x09C4) \
467 PAIR(0x09CD, 0x09CD) \
468 PAIR(0x09E2, 0x09E3) \
469 PAIR(0x0A01, 0x0A02) \
470 PAIR(0x0A3C, 0x0A3C) \
471 PAIR(0x0A41, 0x0A42) \
472 PAIR(0x0A47, 0x0A48) \
473 PAIR(0x0A4B, 0x0A4D) \
474 PAIR(0x0A70, 0x0A71) \
475 PAIR(0x0A81, 0x0A82) \
476 PAIR(0x0ABC, 0x0ABC) \
477 BIG_(0x0AC1, 0x0AC5) \
478 PAIR(0x0AC7, 0x0AC8) \
479 PAIR(0x0ACD, 0x0ACD) \
480 PAIR(0x0AE2, 0x0AE3) \
481 PAIR(0x0B01, 0x0B01) \
482 PAIR(0x0B3C, 0x0B3C) \
483 PAIR(0x0B3F, 0x0B3F) \
484 PAIR(0x0B41, 0x0B43) \
485 PAIR(0x0B4D, 0x0B4D) \
486 PAIR(0x0B56, 0x0B56) \
487 PAIR(0x0B82, 0x0B82) \
488 PAIR(0x0BC0, 0x0BC0) \
489 PAIR(0x0BCD, 0x0BCD) \
490 PAIR(0x0C3E, 0x0C40) \
491 PAIR(0x0C46, 0x0C48) \
492 PAIR(0x0C4A, 0x0C4D) \
493 PAIR(0x0C55, 0x0C56) \
494 PAIR(0x0CBC, 0x0CBC) \
495 PAIR(0x0CBF, 0x0CBF) \
496 PAIR(0x0CC6, 0x0CC6) \
497 PAIR(0x0CCC, 0x0CCD) \
498 PAIR(0x0CE2, 0x0CE3) \
499 PAIR(0x0D41, 0x0D43) \
500 PAIR(0x0D4D, 0x0D4D) \
501 PAIR(0x0DCA, 0x0DCA) \
502 PAIR(0x0DD2, 0x0DD4) \
503 PAIR(0x0DD6, 0x0DD6) \
504 PAIR(0x0E31, 0x0E31) \
505 BIG_(0x0E34, 0x0E3A) \
506 BIG_(0x0E47, 0x0E4E) \
507 PAIR(0x0EB1, 0x0EB1) \
508 BIG_(0x0EB4, 0x0EB9) \
509 PAIR(0x0EBB, 0x0EBC) \
510 BIG_(0x0EC8, 0x0ECD) \
511 PAIR(0x0F18, 0x0F19) \
512 PAIR(0x0F35, 0x0F35) \
513 PAIR(0x0F37, 0x0F37) \
514 PAIR(0x0F39, 0x0F39) \
515 BIG_(0x0F71, 0x0F7E) \
516 BIG_(0x0F80, 0x0F84) \
517 PAIR(0x0F86, 0x0F87) \
518 PAIR(0x0FC6, 0x0FC6) \
519 BIG_(0x0F90, 0x0F97) \
520 BIG_(0x0F99, 0x0FBC) \
521 PAIR(0x102D, 0x1030) \
522 PAIR(0x1032, 0x1032) \
523 PAIR(0x1036, 0x1037) \
524 PAIR(0x1039, 0x1039) \
525 PAIR(0x1058, 0x1059) \
526 BIG_(0x1160, 0x11FF) \
527 PAIR(0x135F, 0x135F) \
528 PAIR(0x1712, 0x1714) \
529 PAIR(0x1732, 0x1734) \
530 PAIR(0x1752, 0x1753) \
531 PAIR(0x1772, 0x1773) \
532 PAIR(0x17B4, 0x17B5) \
533 BIG_(0x17B7, 0x17BD) \
534 PAIR(0x17C6, 0x17C6) \
535 BIG_(0x17C9, 0x17D3) \
536 PAIR(0x17DD, 0x17DD) \
537 PAIR(0x180B, 0x180D) \
538 PAIR(0x18A9, 0x18A9) \
539 PAIR(0x1920, 0x1922) \
540 PAIR(0x1927, 0x1928) \
541 PAIR(0x1932, 0x1932) \
542 PAIR(0x1939, 0x193B) \
543 PAIR(0x1A17, 0x1A18) \
544 PAIR(0x1B00, 0x1B03) \
545 PAIR(0x1B34, 0x1B34) \
546 BIG_(0x1B36, 0x1B3A) \
547 PAIR(0x1B3C, 0x1B3C) \
548 PAIR(0x1B42, 0x1B42) \
549 BIG_(0x1B6B, 0x1B73) \
550 BIG_(0x1DC0, 0x1DCA) \
551 PAIR(0x1DFE, 0x1DFF) \
552 BIG_(0x200B, 0x200F) \
553 BIG_(0x202A, 0x202E) \
554 PAIR(0x2060, 0x2063) \
555 BIG_(0x206A, 0x206F) \
556 BIG_(0x20D0, 0x20EF) \
557 BIG_(0x302A, 0x302F) \
558 PAIR(0x3099, 0x309A) \
559 \
560 BIG_(0xA806, 0xA806) \
561 BIG_(0xA80B, 0xA80B) \
562 BIG_(0xA825, 0xA826) \
563 BIG_(0xFB1E, 0xFB1E) \
564 BIG_(0xFE00, 0xFE0F) \
565 BIG_(0xFE20, 0xFE23) \
566 BIG_(0xFEFF, 0xFEFF) \
567 BIG_(0xFFF9, 0xFFFB)
568 static const struct interval combining[] = { ARRAY };
569# undef BIG_
570# undef PAIR
571# define BIG_(a,b)
572# define PAIR(a,b) (a << 2) | (b-a),
573 static const uint16_t combining1[] = { ARRAY };
574# undef BIG_
575# undef PAIR
576# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
577# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
578 struct CHECK { ARRAY };
579# undef BIG_
580# undef PAIR
581# undef ARRAY
582# endif
583
584 if (ucs == 0)
585 return 0;
586
587
588 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
589 return -1;
590
591 if (ucs > CONFIG_LAST_SUPPORTED_WCHAR)
592 return -1;
593
594
595 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
596 return 1;
597
598# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
599
600 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
601 return 0;
602 if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
603 return 0;
604
605
606 if (CONFIG_LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
607 return 1;
608
609# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x1100
610
611
612
613
614 if ((CONFIG_LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
615 || (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
616 ) {
617 return -1;
618 }
619
620 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
621 return -1;
622 }
623
624# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
625 if (ucs >= 0x10000) {
626
627 static const struct interval combining0x10000[] = {
628 { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
629 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
630 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
631 { 0xD242, 0xD244 }
632 };
633
634 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
635 return 0;
636
637 if (CONFIG_LAST_SUPPORTED_WCHAR >= 0xE0001
638 && ( ucs == 0xE0001
639 || (ucs >= 0xE0020 && ucs <= 0xE007F)
640 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
641 )
642 ) {
643 return 0;
644 }
645 }
646# endif
647
648
649
650
651 return 1 +
652 ( ( ucs <= 0x115f)
653 || ucs == 0x2329
654 || ucs == 0x232a
655 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f)
656# if CONFIG_LAST_SUPPORTED_WCHAR >= 0xac00
657 || (ucs >= 0xac00 && ucs <= 0xd7a3)
658 || (ucs >= 0xf900 && ucs <= 0xfaff)
659 || (ucs >= 0xfe10 && ucs <= 0xfe19)
660 || (ucs >= 0xfe30 && ucs <= 0xfe6f)
661 || (ucs >= 0xff00 && ucs <= 0xff60)
662 || (ucs >= 0xffe0 && ucs <= 0xffe6)
663 || ((ucs >> 17) == (2 >> 1))
664# endif
665 );
666# endif
667# endif
668}
669
670
671# if ENABLE_UNICODE_BIDI_SUPPORT
672int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
673{
674
675
676
677
678# define BIG_(a,b) { a, b },
679# define PAIR(a,b)
680# define ARRAY \
681 PAIR(0x0590, 0x0590) \
682 PAIR(0x05BE, 0x05BE) \
683 PAIR(0x05C0, 0x05C0) \
684 PAIR(0x05C3, 0x05C3) \
685 PAIR(0x05C6, 0x05C6) \
686 BIG_(0x05C8, 0x05FF) \
687 PAIR(0x0604, 0x0605) \
688 PAIR(0x0608, 0x0608) \
689 PAIR(0x060B, 0x060B) \
690 PAIR(0x060D, 0x060D) \
691 BIG_(0x061B, 0x064A) \
692 PAIR(0x065F, 0x065F) \
693 PAIR(0x066D, 0x066F) \
694 BIG_(0x0671, 0x06D5) \
695 PAIR(0x06E5, 0x06E6) \
696 PAIR(0x06EE, 0x06EF) \
697 BIG_(0x06FA, 0x070E) \
698 PAIR(0x0710, 0x0710) \
699 BIG_(0x0712, 0x072F) \
700 BIG_(0x074B, 0x07A5) \
701 BIG_(0x07B1, 0x07EA) \
702 PAIR(0x07F4, 0x07F5) \
703 BIG_(0x07FA, 0x0815) \
704 PAIR(0x081A, 0x081A) \
705 PAIR(0x0824, 0x0824) \
706 PAIR(0x0828, 0x0828) \
707 BIG_(0x082E, 0x08FF) \
708 PAIR(0x200F, 0x200F) \
709 PAIR(0x202B, 0x202B) \
710 PAIR(0x202E, 0x202E) \
711 BIG_(0xFB1D, 0xFB1D) \
712 BIG_(0xFB1F, 0xFB28) \
713 BIG_(0xFB2A, 0xFD3D) \
714 BIG_(0xFD40, 0xFDCF) \
715 BIG_(0xFDC8, 0xFDCF) \
716 BIG_(0xFDF0, 0xFDFC) \
717 BIG_(0xFDFE, 0xFDFF) \
718 BIG_(0xFE70, 0xFEFE)
719
720
721
722
723
724
725
726
727
728
729
730
731
732 static const struct interval rtl_b[] = { ARRAY };
733# undef BIG_
734# undef PAIR
735# define BIG_(a,b)
736# define PAIR(a,b) (a << 2) | (b-a),
737 static const uint16_t rtl_p[] = { ARRAY };
738# undef BIG_
739# undef PAIR
740# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
741# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
742 struct CHECK { ARRAY };
743# undef BIG_
744# undef PAIR
745# undef ARRAY
746
747 if (in_interval_table(wc, rtl_b, ARRAY_SIZE(rtl_b) - 1))
748 return 1;
749 if (in_uint16_table(wc, rtl_p, ARRAY_SIZE(rtl_p) - 1))
750 return 1;
751 return 0;
752}
753
754# if ENABLE_UNICODE_NEUTRAL_TABLE
755int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
756{
757
758
759
760
761
762
763# define BIG_(a,b) { a, b },
764# define PAIR(a,b)
765# define ARRAY \
766 BIG_(0x0009, 0x000D) \
767 BIG_(0x001C, 0x0040) \
768 BIG_(0x005B, 0x0060) \
769 PAIR(0x007B, 0x007E) \
770 PAIR(0x0085, 0x0085) \
771 BIG_(0x00A0, 0x00A9) \
772 PAIR(0x00AB, 0x00AC) \
773 BIG_(0x00AE, 0x00B4) \
774 PAIR(0x00B6, 0x00B9) \
775 BIG_(0x00BB, 0x00BF) \
776 PAIR(0x00D7, 0x00D7) \
777 PAIR(0x00F7, 0x00F7) \
778 PAIR(0x02B9, 0x02BA) \
779 BIG_(0x02C2, 0x02CF) \
780 BIG_(0x02D2, 0x02DF) \
781 BIG_(0x02E5, 0x02FF) \
782 PAIR(0x0374, 0x0375) \
783 PAIR(0x037E, 0x037E) \
784 PAIR(0x0384, 0x0385) \
785 PAIR(0x0387, 0x0387) \
786 PAIR(0x03F6, 0x03F6) \
787 PAIR(0x058A, 0x058A) \
788 PAIR(0x0600, 0x0603) \
789 PAIR(0x0606, 0x0607) \
790 PAIR(0x0609, 0x060A) \
791 PAIR(0x060C, 0x060C) \
792 PAIR(0x060E, 0x060F) \
793 BIG_(0x0660, 0x066C) \
794 PAIR(0x06DD, 0x06DD) \
795 PAIR(0x06E9, 0x06E9) \
796 BIG_(0x06F0, 0x06F9) \
797 PAIR(0x07F6, 0x07F9) \
798 PAIR(0x09F2, 0x09F3) \
799 PAIR(0x09FB, 0x09FB) \
800 PAIR(0x0AF1, 0x0AF1) \
801 BIG_(0x0BF3, 0x0BFA) \
802 BIG_(0x0C78, 0x0C7E) \
803 PAIR(0x0CF1, 0x0CF2) \
804 PAIR(0x0E3F, 0x0E3F) \
805 PAIR(0x0F3A, 0x0F3D) \
806 BIG_(0x1390, 0x1400) \
807 PAIR(0x1680, 0x1680) \
808 PAIR(0x169B, 0x169C) \
809 PAIR(0x17DB, 0x17DB) \
810 BIG_(0x17F0, 0x17F9) \
811 BIG_(0x1800, 0x180A) \
812 PAIR(0x180E, 0x180E) \
813 PAIR(0x1940, 0x1940) \
814 PAIR(0x1944, 0x1945) \
815 BIG_(0x19DE, 0x19FF) \
816 PAIR(0x1FBD, 0x1FBD) \
817 PAIR(0x1FBF, 0x1FC1) \
818 PAIR(0x1FCD, 0x1FCF) \
819 PAIR(0x1FDD, 0x1FDF) \
820 PAIR(0x1FED, 0x1FEF) \
821 PAIR(0x1FFD, 0x1FFE) \
822 BIG_(0x2000, 0x200A) \
823 BIG_(0x2010, 0x2029) \
824 BIG_(0x202F, 0x205F) \
825 PAIR(0x2070, 0x2070) \
826 BIG_(0x2074, 0x207E) \
827 BIG_(0x2080, 0x208E) \
828 BIG_(0x20A0, 0x20B8) \
829 PAIR(0x2100, 0x2101) \
830 PAIR(0x2103, 0x2106) \
831 PAIR(0x2108, 0x2109) \
832 PAIR(0x2114, 0x2114) \
833 PAIR(0x2116, 0x2118) \
834 BIG_(0x211E, 0x2123) \
835 PAIR(0x2125, 0x2125) \
836 PAIR(0x2127, 0x2127) \
837 PAIR(0x2129, 0x2129) \
838 PAIR(0x212E, 0x212E) \
839 PAIR(0x213A, 0x213B) \
840 BIG_(0x2140, 0x2144) \
841 PAIR(0x214A, 0x214D) \
842 BIG_(0x2150, 0x215F) \
843 PAIR(0x2189, 0x2189) \
844 BIG_(0x2190, 0x2335) \
845 BIG_(0x237B, 0x2394) \
846 BIG_(0x2396, 0x23E8) \
847 BIG_(0x2400, 0x2426) \
848 BIG_(0x2440, 0x244A) \
849 BIG_(0x2460, 0x249B) \
850 BIG_(0x24EA, 0x26AB) \
851 BIG_(0x26AD, 0x26CD) \
852 BIG_(0x26CF, 0x26E1) \
853 PAIR(0x26E3, 0x26E3) \
854 BIG_(0x26E8, 0x26FF) \
855 PAIR(0x2701, 0x2704) \
856 PAIR(0x2706, 0x2709) \
857 BIG_(0x270C, 0x2727) \
858 BIG_(0x2729, 0x274B) \
859 PAIR(0x274D, 0x274D) \
860 PAIR(0x274F, 0x2752) \
861 BIG_(0x2756, 0x275E) \
862 BIG_(0x2761, 0x2794) \
863 BIG_(0x2798, 0x27AF) \
864 BIG_(0x27B1, 0x27BE) \
865 BIG_(0x27C0, 0x27CA) \
866 PAIR(0x27CC, 0x27CC) \
867 BIG_(0x27D0, 0x27FF) \
868 BIG_(0x2900, 0x2B4C) \
869 BIG_(0x2B50, 0x2B59) \
870 BIG_(0x2CE5, 0x2CEA) \
871 BIG_(0x2CF9, 0x2CFF) \
872 BIG_(0x2E00, 0x2E99) \
873 BIG_(0x2E9B, 0x2EF3) \
874 BIG_(0x2F00, 0x2FD5) \
875 BIG_(0x2FF0, 0x2FFB) \
876 BIG_(0x3000, 0x3004) \
877 BIG_(0x3008, 0x3020) \
878 PAIR(0x3030, 0x3030) \
879 PAIR(0x3036, 0x3037) \
880 PAIR(0x303D, 0x303D) \
881 PAIR(0x303E, 0x303F) \
882 PAIR(0x309B, 0x309C) \
883 PAIR(0x30A0, 0x30A0) \
884 PAIR(0x30FB, 0x30FB) \
885 BIG_(0x31C0, 0x31E3) \
886 PAIR(0x321D, 0x321E) \
887 BIG_(0x3250, 0x325F) \
888 PAIR(0x327C, 0x327E) \
889 BIG_(0x32B1, 0x32BF) \
890 PAIR(0x32CC, 0x32CF) \
891 PAIR(0x3377, 0x337A) \
892 PAIR(0x33DE, 0x33DF) \
893 PAIR(0x33FF, 0x33FF) \
894 BIG_(0x4DC0, 0x4DFF) \
895 BIG_(0xA490, 0xA4C6) \
896 BIG_(0xA60D, 0xA60F) \
897 BIG_(0xA673, 0xA673) \
898 BIG_(0xA67E, 0xA67F) \
899 BIG_(0xA700, 0xA721) \
900 BIG_(0xA788, 0xA788) \
901 BIG_(0xA828, 0xA82B) \
902 BIG_(0xA838, 0xA839) \
903 BIG_(0xA874, 0xA877) \
904 BIG_(0xFB29, 0xFB29) \
905 BIG_(0xFD3E, 0xFD3F) \
906 BIG_(0xFDFD, 0xFDFD) \
907 BIG_(0xFE10, 0xFE19) \
908 BIG_(0xFE30, 0xFE52) \
909 BIG_(0xFE54, 0xFE66) \
910 BIG_(0xFE68, 0xFE6B) \
911 BIG_(0xFF01, 0xFF20) \
912 BIG_(0xFF3B, 0xFF40) \
913 BIG_(0xFF5B, 0xFF65) \
914 BIG_(0xFFE0, 0xFFE6) \
915 BIG_(0xFFE8, 0xFFEE) \
916 BIG_(0xFFF9, 0xFFFD)
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936 static const struct interval neutral_b[] = { ARRAY };
937# undef BIG_
938# undef PAIR
939# define BIG_(a,b)
940# define PAIR(a,b) (a << 2) | (b-a),
941 static const uint16_t neutral_p[] = { ARRAY };
942# undef BIG_
943# undef PAIR
944# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
945# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
946 struct CHECK { ARRAY };
947# undef BIG_
948# undef PAIR
949# undef ARRAY
950
951 if (in_interval_table(wc, neutral_b, ARRAY_SIZE(neutral_b) - 1))
952 return 1;
953 if (in_uint16_table(wc, neutral_p, ARRAY_SIZE(neutral_p) - 1))
954 return 1;
955 return 0;
956}
957# endif
958
959# endif
960
961#endif
962
963
964
965
966#if 0
967size_t FAST_FUNC unicode_strlen(const char *string)
968{
969 size_t width = mbstowcs(NULL, string, INT_MAX);
970 if (width == (size_t)-1L)
971 return strlen(string);
972 return width;
973}
974#endif
975
976size_t FAST_FUNC unicode_strwidth(const char *string)
977{
978 uni_stat_t uni_stat;
979 printable_string(&uni_stat, string);
980 return uni_stat.unicode_width;
981}
982
983static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
984{
985 char *dst;
986 unsigned dst_len;
987 unsigned uni_count;
988 unsigned uni_width;
989
990 if (unicode_status != UNICODE_ON) {
991 char *d;
992 if (flags & UNI_FLAG_PAD) {
993 d = dst = xmalloc(width + 1);
994 while ((int)--width >= 0) {
995 unsigned char c = *src;
996 if (c == '\0') {
997 do
998 *d++ = ' ';
999 while ((int)--width >= 0);
1000 break;
1001 }
1002 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
1003 src++;
1004 }
1005 *d = '\0';
1006 } else {
1007 d = dst = xstrndup(src, width);
1008 while (*d) {
1009 unsigned char c = *d;
1010 if (c < ' ' || c >= 0x7f)
1011 *d = '?';
1012 d++;
1013 }
1014 }
1015 if (stats) {
1016 stats->byte_count = (d - dst);
1017 stats->unicode_count = (d - dst);
1018 stats->unicode_width = (d - dst);
1019 }
1020 return dst;
1021 }
1022
1023 dst = NULL;
1024 uni_count = uni_width = 0;
1025 dst_len = 0;
1026 while (1) {
1027 int w;
1028 wchar_t wc;
1029
1030#if ENABLE_UNICODE_USING_LOCALE
1031 {
1032 mbstate_t mbst = { 0 };
1033 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
1034
1035
1036
1037
1038
1039
1040
1041
1042 if (rc == 0)
1043 break;
1044 if (rc < 0) {
1045 src++;
1046 goto subst;
1047 }
1048 if (!iswprint(wc))
1049 goto subst;
1050 }
1051#else
1052 src = mbstowc_internal(&wc, src);
1053
1054
1055
1056
1057 if (wc == ERROR_WCHAR)
1058 goto subst;
1059 if (wc == 0)
1060 break;
1061#endif
1062 if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
1063 goto subst;
1064 w = wcwidth(wc);
1065 if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0)
1066 || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
1067 || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
1068 ) {
1069 subst:
1070 wc = CONFIG_SUBST_WCHAR;
1071 w = 1;
1072 }
1073 width -= w;
1074
1075
1076 if ((int)width < 0) {
1077
1078 width += w;
1079 break;
1080 }
1081
1082 uni_count++;
1083 uni_width += w;
1084 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
1085#if ENABLE_UNICODE_USING_LOCALE
1086 {
1087 mbstate_t mbst = { 0 };
1088 dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
1089 }
1090#else
1091 dst_len += wcrtomb_internal(&dst[dst_len], wc);
1092#endif
1093 }
1094
1095
1096 if (flags & UNI_FLAG_PAD) {
1097 dst = xrealloc(dst, dst_len + width + 1);
1098 uni_count += width;
1099 uni_width += width;
1100 while ((int)--width >= 0) {
1101 dst[dst_len++] = ' ';
1102 }
1103 }
1104 dst[dst_len] = '\0';
1105 if (stats) {
1106 stats->byte_count = dst_len;
1107 stats->unicode_count = uni_count;
1108 stats->unicode_width = uni_width;
1109 }
1110
1111 return dst;
1112}
1113char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
1114{
1115 return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
1116}
1117char* FAST_FUNC unicode_conv_to_printable_fixedwidth( const char *src, unsigned width)
1118{
1119 return unicode_conv_to_printable2( NULL, src, width, UNI_FLAG_PAD);
1120}
1121
1122#ifdef UNUSED
1123char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
1124{
1125 return unicode_conv_to_printable2(stats, src, maxwidth, 0);
1126}
1127
1128unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
1129{
1130 if (unicode_status != UNICODE_ON) {
1131 return width - strnlen(src, width);
1132 }
1133
1134 while (1) {
1135 int w;
1136 wchar_t wc;
1137
1138#if ENABLE_UNICODE_USING_LOCALE
1139 {
1140 mbstate_t mbst = { 0 };
1141 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
1142 if (rc <= 0)
1143 return width;
1144 }
1145#else
1146 src = mbstowc_internal(&wc, src);
1147 if (wc == ERROR_WCHAR || wc == 0)
1148 return width;
1149#endif
1150 w = wcwidth(wc);
1151 if (w < 0)
1152 return width;
1153 width -= w;
1154 if ((int)width <= 0)
1155 return 0;
1156 }
1157}
1158#endif
1159