linux/lib/glob.c
<<
>>
Prefs
   1#include <linux/module.h>
   2#include <linux/glob.h>
   3
   4/*
   5 * The only reason this code can be compiled as a module is because the
   6 * ATA code that depends on it can be as well.  In practice, they're
   7 * both usually compiled in and the module overhead goes away.
   8 */
   9MODULE_DESCRIPTION("glob(7) matching");
  10MODULE_LICENSE("Dual MIT/GPL");
  11
  12/**
  13 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
  14 * @pat: Shell-style pattern to match, e.g. "*.[ch]".
  15 * @str: String to match.  The pattern must match the entire string.
  16 *
  17 * Perform shell-style glob matching, returning true (1) if the match
  18 * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
  19 *
  20 * Pattern metacharacters are ?, *, [ and \.
  21 * (And, inside character classes, !, - and ].)
  22 *
  23 * This is small and simple implementation intended for device blacklists
  24 * where a string is matched against a number of patterns.  Thus, it
  25 * does not preprocess the patterns.  It is non-recursive, and run-time
  26 * is at most quadratic: strlen(@str)*strlen(@pat).
  27 *
  28 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
  29 * it takes 6 passes over the pattern before matching the string.
  30 *
  31 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
  32 * treat / or leading . specially; it isn't actually used for pathnames.
  33 *
  34 * Note that according to glob(7) (and unlike bash), character classes
  35 * are complemented by a leading !; this does not support the regex-style
  36 * [^a-z] syntax.
  37 *
  38 * An opening bracket without a matching close is matched literally.
  39 */
  40bool __pure glob_match(char const *pat, char const *str)
  41{
  42        /*
  43         * Backtrack to previous * on mismatch and retry starting one
  44         * character later in the string.  Because * matches all characters
  45         * (no exception for /), it can be easily proved that there's
  46         * never a need to backtrack multiple levels.
  47         */
  48        char const *back_pat = NULL, *back_str = back_str;
  49
  50        /*
  51         * Loop over each token (character or class) in pat, matching
  52         * it against the remaining unmatched tail of str.  Return false
  53         * on mismatch, or true after matching the trailing nul bytes.
  54         */
  55        for (;;) {
  56                unsigned char c = *str++;
  57                unsigned char d = *pat++;
  58
  59                switch (d) {
  60                case '?':       /* Wildcard: anything but nul */
  61                        if (c == '\0')
  62                                return false;
  63                        break;
  64                case '*':       /* Any-length wildcard */
  65                        if (*pat == '\0')       /* Optimize trailing * case */
  66                                return true;
  67                        back_pat = pat;
  68                        back_str = --str;       /* Allow zero-length match */
  69                        break;
  70                case '[': {     /* Character class */
  71                        bool match = false, inverted = (*pat == '!');
  72                        char const *class = pat + inverted;
  73                        unsigned char a = *class++;
  74
  75                        /*
  76                         * Iterate over each span in the character class.
  77                         * A span is either a single character a, or a
  78                         * range a-b.  The first span may begin with ']'.
  79                         */
  80                        do {
  81                                unsigned char b = a;
  82
  83                                if (a == '\0')  /* Malformed */
  84                                        goto literal;
  85
  86                                if (class[0] == '-' && class[1] != ']') {
  87                                        b = class[1];
  88
  89                                        if (b == '\0')
  90                                                goto literal;
  91
  92                                        class += 2;
  93                                        /* Any special action if a > b? */
  94                                }
  95                                match |= (a <= c && c <= b);
  96                        } while ((a = *class++) != ']');
  97
  98                        if (match == inverted)
  99                                goto backtrack;
 100                        pat = class;
 101                        }
 102                        break;
 103                case '\\':
 104                        d = *pat++;
 105                        /*FALLTHROUGH*/
 106                default:        /* Literal character */
 107literal:
 108                        if (c == d) {
 109                                if (d == '\0')
 110                                        return true;
 111                                break;
 112                        }
 113backtrack:
 114                        if (c == '\0' || !back_pat)
 115                                return false;   /* No point continuing */
 116                        /* Try again from last *, one character later in str. */
 117                        pat = back_pat;
 118                        str = ++back_str;
 119                        break;
 120                }
 121        }
 122}
 123EXPORT_SYMBOL(glob_match);
 124
 125
 126#ifdef CONFIG_GLOB_SELFTEST
 127
 128#include <linux/printk.h>
 129#include <linux/moduleparam.h>
 130
 131/* Boot with "glob.verbose=1" to show successful tests, too */
 132static bool verbose = false;
 133module_param(verbose, bool, 0);
 134
 135struct glob_test {
 136        char const *pat, *str;
 137        bool expected;
 138};
 139
 140static bool __pure __init test(char const *pat, char const *str, bool expected)
 141{
 142        bool match = glob_match(pat, str);
 143        bool success = match == expected;
 144
 145        /* Can't get string literals into a particular section, so... */
 146        static char const msg_error[] __initconst =
 147                KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
 148        static char const msg_ok[] __initconst =
 149                KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
 150        static char const mismatch[] __initconst = "mismatch";
 151        char const *message;
 152
 153        if (!success)
 154                message = msg_error;
 155        else if (verbose)
 156                message = msg_ok;
 157        else
 158                return success;
 159
 160        printk(message, pat, str, mismatch + 3*match);
 161        return success;
 162}
 163
 164/*
 165 * The tests are all jammed together in one array to make it simpler
 166 * to place that array in the .init.rodata section.  The obvious
 167 * "array of structures containing char *" has no way to force the
 168 * pointed-to strings to be in a particular section.
 169 *
 170 * Anyway, a test consists of:
 171 * 1. Expected glob_match result: '1' or '0'.
 172 * 2. Pattern to match: null-terminated string
 173 * 3. String to match against: null-terminated string
 174 *
 175 * The list of tests is terminated with a final '\0' instead of
 176 * a glob_match result character.
 177 */
 178static char const glob_tests[] __initconst =
 179        /* Some basic tests */
 180        "1" "a\0" "a\0"
 181        "0" "a\0" "b\0"
 182        "0" "a\0" "aa\0"
 183        "0" "a\0" "\0"
 184        "1" "\0" "\0"
 185        "0" "\0" "a\0"
 186        /* Simple character class tests */
 187        "1" "[a]\0" "a\0"
 188        "0" "[a]\0" "b\0"
 189        "0" "[!a]\0" "a\0"
 190        "1" "[!a]\0" "b\0"
 191        "1" "[ab]\0" "a\0"
 192        "1" "[ab]\0" "b\0"
 193        "0" "[ab]\0" "c\0"
 194        "1" "[!ab]\0" "c\0"
 195        "1" "[a-c]\0" "b\0"
 196        "0" "[a-c]\0" "d\0"
 197        /* Corner cases in character class parsing */
 198        "1" "[a-c-e-g]\0" "-\0"
 199        "0" "[a-c-e-g]\0" "d\0"
 200        "1" "[a-c-e-g]\0" "f\0"
 201        "1" "[]a-ceg-ik[]\0" "a\0"
 202        "1" "[]a-ceg-ik[]\0" "]\0"
 203        "1" "[]a-ceg-ik[]\0" "[\0"
 204        "1" "[]a-ceg-ik[]\0" "h\0"
 205        "0" "[]a-ceg-ik[]\0" "f\0"
 206        "0" "[!]a-ceg-ik[]\0" "h\0"
 207        "0" "[!]a-ceg-ik[]\0" "]\0"
 208        "1" "[!]a-ceg-ik[]\0" "f\0"
 209        /* Simple wild cards */
 210        "1" "?\0" "a\0"
 211        "0" "?\0" "aa\0"
 212        "0" "??\0" "a\0"
 213        "1" "?x?\0" "axb\0"
 214        "0" "?x?\0" "abx\0"
 215        "0" "?x?\0" "xab\0"
 216        /* Asterisk wild cards (backtracking) */
 217        "0" "*??\0" "a\0"
 218        "1" "*??\0" "ab\0"
 219        "1" "*??\0" "abc\0"
 220        "1" "*??\0" "abcd\0"
 221        "0" "??*\0" "a\0"
 222        "1" "??*\0" "ab\0"
 223        "1" "??*\0" "abc\0"
 224        "1" "??*\0" "abcd\0"
 225        "0" "?*?\0" "a\0"
 226        "1" "?*?\0" "ab\0"
 227        "1" "?*?\0" "abc\0"
 228        "1" "?*?\0" "abcd\0"
 229        "1" "*b\0" "b\0"
 230        "1" "*b\0" "ab\0"
 231        "0" "*b\0" "ba\0"
 232        "1" "*b\0" "bb\0"
 233        "1" "*b\0" "abb\0"
 234        "1" "*b\0" "bab\0"
 235        "1" "*bc\0" "abbc\0"
 236        "1" "*bc\0" "bc\0"
 237        "1" "*bc\0" "bbc\0"
 238        "1" "*bc\0" "bcbc\0"
 239        /* Multiple asterisks (complex backtracking) */
 240        "1" "*ac*\0" "abacadaeafag\0"
 241        "1" "*ac*ae*ag*\0" "abacadaeafag\0"
 242        "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
 243        "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
 244        "1" "*abcd*\0" "abcabcabcabcdefg\0"
 245        "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
 246        "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
 247        "0" "*abcd*\0" "abcabcabcabcefg\0"
 248        "0" "*ab*cd*\0" "abcabcabcabcefg\0";
 249
 250static int __init glob_init(void)
 251{
 252        unsigned successes = 0;
 253        unsigned n = 0;
 254        char const *p = glob_tests;
 255        static char const message[] __initconst =
 256                KERN_INFO "glob: %u self-tests passed, %u failed\n";
 257
 258        /*
 259         * Tests are jammed together in a string.  The first byte is '1'
 260         * or '0' to indicate the expected outcome, or '\0' to indicate the
 261         * end of the tests.  Then come two null-terminated strings: the
 262         * pattern and the string to match it against.
 263         */
 264        while (*p) {
 265                bool expected = *p++ & 1;
 266                char const *pat = p;
 267
 268                p += strlen(p) + 1;
 269                successes += test(pat, p, expected);
 270                p += strlen(p) + 1;
 271                n++;
 272        }
 273
 274        n -= successes;
 275        printk(message, successes, n);
 276
 277        /* What's the errno for "kernel bug detected"?  Guess... */
 278        return n ? -ECANCELED : 0;
 279}
 280
 281/* We need a dummy exit function to allow unload */
 282static void __exit glob_fini(void) { }
 283
 284module_init(glob_init);
 285module_exit(glob_fini);
 286
 287#endif /* CONFIG_GLOB_SELFTEST */
 288