linux/lib/string_helpers.c
<<
>>
Prefs
   1/*
   2 * Helpers for formatting and printing strings
   3 *
   4 * Copyright 31 August 2008 James Bottomley
   5 * Copyright (C) 2013, Intel Corporation
   6 */
   7#include <linux/bug.h>
   8#include <linux/kernel.h>
   9#include <linux/math64.h>
  10#include <linux/export.h>
  11#include <linux/ctype.h>
  12#include <linux/errno.h>
  13#include <linux/string.h>
  14#include <linux/string_helpers.h>
  15
  16/**
  17 * string_get_size - get the size in the specified units
  18 * @size:       The size to be converted in blocks
  19 * @blk_size:   Size of the block (use 1 for size in bytes)
  20 * @units:      units to use (powers of 1000 or 1024)
  21 * @buf:        buffer to format to
  22 * @len:        length of buffer
  23 *
  24 * This function returns a string formatted to 3 significant figures
  25 * giving the size in the required units.  @buf should have room for
  26 * at least 9 bytes and will always be zero terminated.
  27 *
  28 */
  29void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
  30                     char *buf, int len)
  31{
  32        static const char *const units_10[] = {
  33                "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
  34        };
  35        static const char *const units_2[] = {
  36                "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
  37        };
  38        static const char *const *const units_str[] = {
  39                [STRING_UNITS_10] = units_10,
  40                [STRING_UNITS_2] = units_2,
  41        };
  42        static const unsigned int divisor[] = {
  43                [STRING_UNITS_10] = 1000,
  44                [STRING_UNITS_2] = 1024,
  45        };
  46        static const unsigned int rounding[] = { 500, 50, 5 };
  47        int i = 0, j;
  48        u32 remainder = 0, sf_cap;
  49        char tmp[8];
  50        const char *unit;
  51
  52        tmp[0] = '\0';
  53
  54        if (blk_size == 0)
  55                size = 0;
  56        if (size == 0)
  57                goto out;
  58
  59        /* This is Napier's algorithm.  Reduce the original block size to
  60         *
  61         * coefficient * divisor[units]^i
  62         *
  63         * we do the reduction so both coefficients are just under 32 bits so
  64         * that multiplying them together won't overflow 64 bits and we keep
  65         * as much precision as possible in the numbers.
  66         *
  67         * Note: it's safe to throw away the remainders here because all the
  68         * precision is in the coefficients.
  69         */
  70        while (blk_size >> 32) {
  71                do_div(blk_size, divisor[units]);
  72                i++;
  73        }
  74
  75        while (size >> 32) {
  76                do_div(size, divisor[units]);
  77                i++;
  78        }
  79
  80        /* now perform the actual multiplication keeping i as the sum of the
  81         * two logarithms */
  82        size *= blk_size;
  83
  84        /* and logarithmically reduce it until it's just under the divisor */
  85        while (size >= divisor[units]) {
  86                remainder = do_div(size, divisor[units]);
  87                i++;
  88        }
  89
  90        /* work out in j how many digits of precision we need from the
  91         * remainder */
  92        sf_cap = size;
  93        for (j = 0; sf_cap*10 < 1000; j++)
  94                sf_cap *= 10;
  95
  96        if (units == STRING_UNITS_2) {
  97                /* express the remainder as a decimal.  It's currently the
  98                 * numerator of a fraction whose denominator is
  99                 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
 100                remainder *= 1000;
 101                remainder >>= 10;
 102        }
 103
 104        /* add a 5 to the digit below what will be printed to ensure
 105         * an arithmetical round up and carry it through to size */
 106        remainder += rounding[j];
 107        if (remainder >= 1000) {
 108                remainder -= 1000;
 109                size += 1;
 110        }
 111
 112        if (j) {
 113                snprintf(tmp, sizeof(tmp), ".%03u", remainder);
 114                tmp[j+1] = '\0';
 115        }
 116
 117 out:
 118        if (i >= ARRAY_SIZE(units_2))
 119                unit = "UNK";
 120        else
 121                unit = units_str[units][i];
 122
 123        snprintf(buf, len, "%u%s %s", (u32)size,
 124                 tmp, unit);
 125}
 126EXPORT_SYMBOL(string_get_size);
 127
 128static bool unescape_space(char **src, char **dst)
 129{
 130        char *p = *dst, *q = *src;
 131
 132        switch (*q) {
 133        case 'n':
 134                *p = '\n';
 135                break;
 136        case 'r':
 137                *p = '\r';
 138                break;
 139        case 't':
 140                *p = '\t';
 141                break;
 142        case 'v':
 143                *p = '\v';
 144                break;
 145        case 'f':
 146                *p = '\f';
 147                break;
 148        default:
 149                return false;
 150        }
 151        *dst += 1;
 152        *src += 1;
 153        return true;
 154}
 155
 156static bool unescape_octal(char **src, char **dst)
 157{
 158        char *p = *dst, *q = *src;
 159        u8 num;
 160
 161        if (isodigit(*q) == 0)
 162                return false;
 163
 164        num = (*q++) & 7;
 165        while (num < 32 && isodigit(*q) && (q - *src < 3)) {
 166                num <<= 3;
 167                num += (*q++) & 7;
 168        }
 169        *p = num;
 170        *dst += 1;
 171        *src = q;
 172        return true;
 173}
 174
 175static bool unescape_hex(char **src, char **dst)
 176{
 177        char *p = *dst, *q = *src;
 178        int digit;
 179        u8 num;
 180
 181        if (*q++ != 'x')
 182                return false;
 183
 184        num = digit = hex_to_bin(*q++);
 185        if (digit < 0)
 186                return false;
 187
 188        digit = hex_to_bin(*q);
 189        if (digit >= 0) {
 190                q++;
 191                num = (num << 4) | digit;
 192        }
 193        *p = num;
 194        *dst += 1;
 195        *src = q;
 196        return true;
 197}
 198
 199static bool unescape_special(char **src, char **dst)
 200{
 201        char *p = *dst, *q = *src;
 202
 203        switch (*q) {
 204        case '\"':
 205                *p = '\"';
 206                break;
 207        case '\\':
 208                *p = '\\';
 209                break;
 210        case 'a':
 211                *p = '\a';
 212                break;
 213        case 'e':
 214                *p = '\e';
 215                break;
 216        default:
 217                return false;
 218        }
 219        *dst += 1;
 220        *src += 1;
 221        return true;
 222}
 223
 224/**
 225 * string_unescape - unquote characters in the given string
 226 * @src:        source buffer (escaped)
 227 * @dst:        destination buffer (unescaped)
 228 * @size:       size of the destination buffer (0 to unlimit)
 229 * @flags:      combination of the flags (bitwise OR):
 230 *      %UNESCAPE_SPACE:
 231 *              '\f' - form feed
 232 *              '\n' - new line
 233 *              '\r' - carriage return
 234 *              '\t' - horizontal tab
 235 *              '\v' - vertical tab
 236 *      %UNESCAPE_OCTAL:
 237 *              '\NNN' - byte with octal value NNN (1 to 3 digits)
 238 *      %UNESCAPE_HEX:
 239 *              '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
 240 *      %UNESCAPE_SPECIAL:
 241 *              '\"' - double quote
 242 *              '\\' - backslash
 243 *              '\a' - alert (BEL)
 244 *              '\e' - escape
 245 *      %UNESCAPE_ANY:
 246 *              all previous together
 247 *
 248 * Description:
 249 * The function unquotes characters in the given string.
 250 *
 251 * Because the size of the output will be the same as or less than the size of
 252 * the input, the transformation may be performed in place.
 253 *
 254 * Caller must provide valid source and destination pointers. Be aware that
 255 * destination buffer will always be NULL-terminated. Source string must be
 256 * NULL-terminated as well.
 257 *
 258 * Return:
 259 * The amount of the characters processed to the destination buffer excluding
 260 * trailing '\0' is returned.
 261 */
 262int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 263{
 264        char *out = dst;
 265
 266        while (*src && --size) {
 267                if (src[0] == '\\' && src[1] != '\0' && size > 1) {
 268                        src++;
 269                        size--;
 270
 271                        if (flags & UNESCAPE_SPACE &&
 272                                        unescape_space(&src, &out))
 273                                continue;
 274
 275                        if (flags & UNESCAPE_OCTAL &&
 276                                        unescape_octal(&src, &out))
 277                                continue;
 278
 279                        if (flags & UNESCAPE_HEX &&
 280                                        unescape_hex(&src, &out))
 281                                continue;
 282
 283                        if (flags & UNESCAPE_SPECIAL &&
 284                                        unescape_special(&src, &out))
 285                                continue;
 286
 287                        *out++ = '\\';
 288                }
 289                *out++ = *src++;
 290        }
 291        *out = '\0';
 292
 293        return out - dst;
 294}
 295EXPORT_SYMBOL(string_unescape);
 296
 297static bool escape_passthrough(unsigned char c, char **dst, char *end)
 298{
 299        char *out = *dst;
 300
 301        if (out < end)
 302                *out = c;
 303        *dst = out + 1;
 304        return true;
 305}
 306
 307static bool escape_space(unsigned char c, char **dst, char *end)
 308{
 309        char *out = *dst;
 310        unsigned char to;
 311
 312        switch (c) {
 313        case '\n':
 314                to = 'n';
 315                break;
 316        case '\r':
 317                to = 'r';
 318                break;
 319        case '\t':
 320                to = 't';
 321                break;
 322        case '\v':
 323                to = 'v';
 324                break;
 325        case '\f':
 326                to = 'f';
 327                break;
 328        default:
 329                return false;
 330        }
 331
 332        if (out < end)
 333                *out = '\\';
 334        ++out;
 335        if (out < end)
 336                *out = to;
 337        ++out;
 338
 339        *dst = out;
 340        return true;
 341}
 342
 343static bool escape_special(unsigned char c, char **dst, char *end)
 344{
 345        char *out = *dst;
 346        unsigned char to;
 347
 348        switch (c) {
 349        case '\\':
 350                to = '\\';
 351                break;
 352        case '\a':
 353                to = 'a';
 354                break;
 355        case '\e':
 356                to = 'e';
 357                break;
 358        default:
 359                return false;
 360        }
 361
 362        if (out < end)
 363                *out = '\\';
 364        ++out;
 365        if (out < end)
 366                *out = to;
 367        ++out;
 368
 369        *dst = out;
 370        return true;
 371}
 372
 373static bool escape_null(unsigned char c, char **dst, char *end)
 374{
 375        char *out = *dst;
 376
 377        if (c)
 378                return false;
 379
 380        if (out < end)
 381                *out = '\\';
 382        ++out;
 383        if (out < end)
 384                *out = '0';
 385        ++out;
 386
 387        *dst = out;
 388        return true;
 389}
 390
 391static bool escape_octal(unsigned char c, char **dst, char *end)
 392{
 393        char *out = *dst;
 394
 395        if (out < end)
 396                *out = '\\';
 397        ++out;
 398        if (out < end)
 399                *out = ((c >> 6) & 0x07) + '0';
 400        ++out;
 401        if (out < end)
 402                *out = ((c >> 3) & 0x07) + '0';
 403        ++out;
 404        if (out < end)
 405                *out = ((c >> 0) & 0x07) + '0';
 406        ++out;
 407
 408        *dst = out;
 409        return true;
 410}
 411
 412static bool escape_hex(unsigned char c, char **dst, char *end)
 413{
 414        char *out = *dst;
 415
 416        if (out < end)
 417                *out = '\\';
 418        ++out;
 419        if (out < end)
 420                *out = 'x';
 421        ++out;
 422        if (out < end)
 423                *out = hex_asc_hi(c);
 424        ++out;
 425        if (out < end)
 426                *out = hex_asc_lo(c);
 427        ++out;
 428
 429        *dst = out;
 430        return true;
 431}
 432
 433/**
 434 * string_escape_mem - quote characters in the given memory buffer
 435 * @src:        source buffer (unescaped)
 436 * @isz:        source buffer size
 437 * @dst:        destination buffer (escaped)
 438 * @osz:        destination buffer size
 439 * @flags:      combination of the flags (bitwise OR):
 440 *      %ESCAPE_SPACE: (special white space, not space itself)
 441 *              '\f' - form feed
 442 *              '\n' - new line
 443 *              '\r' - carriage return
 444 *              '\t' - horizontal tab
 445 *              '\v' - vertical tab
 446 *      %ESCAPE_SPECIAL:
 447 *              '\\' - backslash
 448 *              '\a' - alert (BEL)
 449 *              '\e' - escape
 450 *      %ESCAPE_NULL:
 451 *              '\0' - null
 452 *      %ESCAPE_OCTAL:
 453 *              '\NNN' - byte with octal value NNN (3 digits)
 454 *      %ESCAPE_ANY:
 455 *              all previous together
 456 *      %ESCAPE_NP:
 457 *              escape only non-printable characters (checked by isprint)
 458 *      %ESCAPE_ANY_NP:
 459 *              all previous together
 460 *      %ESCAPE_HEX:
 461 *              '\xHH' - byte with hexadecimal value HH (2 digits)
 462 * @only:       NULL-terminated string containing characters used to limit
 463 *              the selected escape class. If characters are included in @only
 464 *              that would not normally be escaped by the classes selected
 465 *              in @flags, they will be copied to @dst unescaped.
 466 *
 467 * Description:
 468 * The process of escaping byte buffer includes several parts. They are applied
 469 * in the following sequence.
 470 *      1. The character is matched to the printable class, if asked, and in
 471 *         case of match it passes through to the output.
 472 *      2. The character is not matched to the one from @only string and thus
 473 *         must go as-is to the output.
 474 *      3. The character is checked if it falls into the class given by @flags.
 475 *         %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
 476 *         character. Note that they actually can't go together, otherwise
 477 *         %ESCAPE_HEX will be ignored.
 478 *
 479 * Caller must provide valid source and destination pointers. Be aware that
 480 * destination buffer will not be NULL-terminated, thus caller have to append
 481 * it if needs.
 482 *
 483 * Return:
 484 * The total size of the escaped output that would be generated for
 485 * the given input and flags. To check whether the output was
 486 * truncated, compare the return value to osz. There is room left in
 487 * dst for a '\0' terminator if and only if ret < osz.
 488 */
 489int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
 490                      unsigned int flags, const char *only)
 491{
 492        char *p = dst;
 493        char *end = p + osz;
 494        bool is_dict = only && *only;
 495
 496        while (isz--) {
 497                unsigned char c = *src++;
 498
 499                /*
 500                 * Apply rules in the following sequence:
 501                 *      - the character is printable, when @flags has
 502                 *        %ESCAPE_NP bit set
 503                 *      - the @only string is supplied and does not contain a
 504                 *        character under question
 505                 *      - the character doesn't fall into a class of symbols
 506                 *        defined by given @flags
 507                 * In these cases we just pass through a character to the
 508                 * output buffer.
 509                 */
 510                if ((flags & ESCAPE_NP && isprint(c)) ||
 511                    (is_dict && !strchr(only, c))) {
 512                        /* do nothing */
 513                } else {
 514                        if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
 515                                continue;
 516
 517                        if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
 518                                continue;
 519
 520                        if (flags & ESCAPE_NULL && escape_null(c, &p, end))
 521                                continue;
 522
 523                        /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
 524                        if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
 525                                continue;
 526
 527                        if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
 528                                continue;
 529                }
 530
 531                escape_passthrough(c, &p, end);
 532        }
 533
 534        return p - dst;
 535}
 536EXPORT_SYMBOL(string_escape_mem);
 537