busybox/libbb/get_line_from_file.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * Utility routines.
   4 *
   5 * Copyright (C) 2005, 2006 Rob Landley <rob@landley.net>
   6 * Copyright (C) 2004 Erik Andersen <andersen@codepoet.org>
   7 * Copyright (C) 2001 Matt Krai
   8 *
   9 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  10 */
  11
  12#include "libbb.h"
  13
  14/* This function reads an entire line from a text file, up to a newline
  15 * or NUL byte, inclusive.  It returns a malloc'ed char * which
  16 * must be free'ed by the caller.  If end is NULL '\n' isn't considered
  17 * end of line.  If end isn't NULL, length of the chunk is stored in it.
  18 * If lineno is not NULL, *lineno is incremented for each line,
  19 * and also trailing '\' is recognized as line continuation.
  20 *
  21 * Returns NULL if EOF/error. */
  22char* FAST_FUNC bb_get_chunk_with_continuation(FILE *file, int *end, int *lineno)
  23{
  24        int ch;
  25        int idx = 0;
  26        char *linebuf = NULL;
  27        int linebufsz = 0;
  28
  29        while ((ch = getc(file)) != EOF) {
  30                /* grow the line buffer as necessary */
  31                if (idx >= linebufsz) {
  32                        linebufsz += 256;
  33                        linebuf = xrealloc(linebuf, linebufsz);
  34                }
  35                linebuf[idx++] = (char) ch;
  36                if (!ch)
  37                        break;
  38                if (end && ch == '\n') {
  39                        if (lineno == NULL)
  40                                break;
  41                        (*lineno)++;
  42                        if (idx < 2 || linebuf[idx-2] != '\\')
  43                                break;
  44                        idx -= 2;
  45                }
  46        }
  47        if (end)
  48                *end = idx;
  49        if (linebuf) {
  50                // huh, does fgets discard prior data on error like this?
  51                // I don't think so....
  52                //if (ferror(file)) {
  53                //      free(linebuf);
  54                //      return NULL;
  55                //}
  56                linebuf = xrealloc(linebuf, idx + 1);
  57                linebuf[idx] = '\0';
  58        }
  59        return linebuf;
  60}
  61
  62char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end)
  63{
  64        return bb_get_chunk_with_continuation(file, end, NULL);
  65}
  66
  67/* Get line, including trailing \n if any */
  68char* FAST_FUNC xmalloc_fgets(FILE *file)
  69{
  70        int i;
  71
  72        return bb_get_chunk_from_file(file, &i);
  73}
  74/* Get line.  Remove trailing \n */
  75char* FAST_FUNC xmalloc_fgetline(FILE *file)
  76{
  77        int i;
  78        char *c = bb_get_chunk_from_file(file, &i);
  79
  80        if (i && c[--i] == '\n')
  81                c[i] = '\0';
  82
  83        return c;
  84}
  85
  86#if 0
  87/* GNUism getline() should be faster (not tested) than a loop with fgetc */
  88
  89/* Get line, including trailing \n if any */
  90char* FAST_FUNC xmalloc_fgets(FILE *file)
  91{
  92        char *res_buf = NULL;
  93        size_t res_sz;
  94
  95        if (getline(&res_buf, &res_sz, file) == -1) {
  96                free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
  97                res_buf = NULL;
  98        }
  99//TODO: trimming to res_sz?
 100        return res_buf;
 101}
 102/* Get line.  Remove trailing \n */
 103char* FAST_FUNC xmalloc_fgetline(FILE *file)
 104{
 105        char *res_buf = NULL;
 106        size_t res_sz;
 107
 108        res_sz = getline(&res_buf, &res_sz, file);
 109
 110        if ((ssize_t)res_sz != -1) {
 111                if (res_buf[res_sz - 1] == '\n')
 112                        res_buf[--res_sz] = '\0';
 113//TODO: trimming to res_sz?
 114        } else {
 115                free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
 116                res_buf = NULL;
 117        }
 118        return res_buf;
 119}
 120
 121#endif
 122
 123#if 0
 124/* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07.
 125 *
 126 * NB: they stop at NUL byte too.
 127 * Performance is important here. Think "grep 50gigabyte_file"...
 128 * Ironically, grep can't use it because of NUL issue.
 129 * We sorely need C lib to provide fgets which reports size!
 130 *
 131 * Update:
 132 * Actually, uclibc and glibc have it. man getline. It's GNUism,
 133 *   but very useful one (if it's as fast as this code).
 134 * TODO:
 135 * - currently, sed and sort use bb_get_chunk_from_file and heavily
 136 *   depend on its "stop on \n or \0" behavior, and STILL they fail
 137 *   to handle all cases with embedded NULs correctly. So:
 138 * - audit sed and sort; convert them to getline FIRST.
 139 * - THEN ditch bb_get_chunk_from_file, replace it with getline.
 140 * - provide getline implementation for non-GNU systems.
 141 */
 142
 143static char* xmalloc_fgets_internal(FILE *file, int *sizep)
 144{
 145        int len;
 146        int idx = 0;
 147        char *linebuf = NULL;
 148
 149        while (1) {
 150                char *r;
 151
 152                linebuf = xrealloc(linebuf, idx + 0x100);
 153                r = fgets(&linebuf[idx], 0x100, file);
 154                if (!r) {
 155                        /* need to terminate in case this is error
 156                         * (EOF puts NUL itself) */
 157                        linebuf[idx] = '\0';
 158                        break;
 159                }
 160                /* stupid. fgets knows the len, it should report it somehow */
 161                len = strlen(&linebuf[idx]);
 162                idx += len;
 163                if (len != 0xff || linebuf[idx - 1] == '\n')
 164                        break;
 165        }
 166        *sizep = idx;
 167        if (idx) {
 168                /* xrealloc(linebuf, idx + 1) is up to caller */
 169                return linebuf;
 170        }
 171        free(linebuf);
 172        return NULL;
 173}
 174
 175/* Get line, remove trailing \n */
 176char* FAST_FUNC xmalloc_fgetline_fast(FILE *file)
 177{
 178        int sz;
 179        char *r = xmalloc_fgets_internal(file, &sz);
 180        if (r && r[sz - 1] == '\n')
 181                r[--sz] = '\0';
 182        return r; /* not xrealloc(r, sz + 1)! */
 183}
 184
 185char* FAST_FUNC xmalloc_fgets(FILE *file)
 186{
 187        int sz;
 188        return xmalloc_fgets_internal(file, &sz);
 189}
 190
 191/* Get line, remove trailing \n */
 192char* FAST_FUNC xmalloc_fgetline(FILE *file)
 193{
 194        int sz;
 195        char *r = xmalloc_fgets_internal(file, &sz);
 196        if (!r)
 197                return r;
 198        if (r[sz - 1] == '\n')
 199                r[--sz] = '\0';
 200        return xrealloc(r, sz + 1);
 201}
 202#endif
 203