linux/lib/ts_kmp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * lib/ts_kmp.c         Knuth-Morris-Pratt text search implementation
   4 *
   5 * Authors:     Thomas Graf <tgraf@suug.ch>
   6 *
   7 * ==========================================================================
   8 * 
   9 *   Implements a linear-time string-matching algorithm due to Knuth,
  10 *   Morris, and Pratt [1]. Their algorithm avoids the explicit
  11 *   computation of the transition function DELTA altogether. Its
  12 *   matching time is O(n), for n being length(text), using just an
  13 *   auxiliary function PI[1..m], for m being length(pattern),
  14 *   precomputed from the pattern in time O(m). The array PI allows
  15 *   the transition function DELTA to be computed efficiently
  16 *   "on the fly" as needed. Roughly speaking, for any state
  17 *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
  18 *   PI["q"] contains the information that is independent of "a" and
  19 *   is needed to compute DELTA("q", "a") [2]. Since the array PI
  20 *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
  21 *   save a factor of |SIGMA| in the preprocessing time by computing
  22 *   PI rather than DELTA.
  23 *
  24 *   [1] Cormen, Leiserson, Rivest, Stein
  25 *       Introdcution to Algorithms, 2nd Edition, MIT Press
  26 *   [2] See finite automaton theory
  27 */
  28
  29#include <linux/module.h>
  30#include <linux/types.h>
  31#include <linux/string.h>
  32#include <linux/ctype.h>
  33#include <linux/textsearch.h>
  34
  35struct ts_kmp
  36{
  37        u8 *            pattern;
  38        unsigned int    pattern_len;
  39        unsigned int    prefix_tbl[0];
  40};
  41
  42static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
  43{
  44        struct ts_kmp *kmp = ts_config_priv(conf);
  45        unsigned int i, q = 0, text_len, consumed = state->offset;
  46        const u8 *text;
  47        const int icase = conf->flags & TS_IGNORECASE;
  48
  49        for (;;) {
  50                text_len = conf->get_next_block(consumed, &text, conf, state);
  51
  52                if (unlikely(text_len == 0))
  53                        break;
  54
  55                for (i = 0; i < text_len; i++) {
  56                        while (q > 0 && kmp->pattern[q]
  57                            != (icase ? toupper(text[i]) : text[i]))
  58                                q = kmp->prefix_tbl[q - 1];
  59                        if (kmp->pattern[q]
  60                            == (icase ? toupper(text[i]) : text[i]))
  61                                q++;
  62                        if (unlikely(q == kmp->pattern_len)) {
  63                                state->offset = consumed + i + 1;
  64                                return state->offset - kmp->pattern_len;
  65                        }
  66                }
  67
  68                consumed += text_len;
  69        }
  70
  71        return UINT_MAX;
  72}
  73
  74static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
  75                                      unsigned int *prefix_tbl, int flags)
  76{
  77        unsigned int k, q;
  78        const u8 icase = flags & TS_IGNORECASE;
  79
  80        for (k = 0, q = 1; q < len; q++) {
  81                while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
  82                    != (icase ? toupper(pattern[q]) : pattern[q]))
  83                        k = prefix_tbl[k-1];
  84                if ((icase ? toupper(pattern[k]) : pattern[k])
  85                    == (icase ? toupper(pattern[q]) : pattern[q]))
  86                        k++;
  87                prefix_tbl[q] = k;
  88        }
  89}
  90
  91static struct ts_config *kmp_init(const void *pattern, unsigned int len,
  92                                  gfp_t gfp_mask, int flags)
  93{
  94        struct ts_config *conf;
  95        struct ts_kmp *kmp;
  96        int i;
  97        unsigned int prefix_tbl_len = len * sizeof(unsigned int);
  98        size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
  99
 100        conf = alloc_ts_config(priv_size, gfp_mask);
 101        if (IS_ERR(conf))
 102                return conf;
 103
 104        conf->flags = flags;
 105        kmp = ts_config_priv(conf);
 106        kmp->pattern_len = len;
 107        compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
 108        kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
 109        if (flags & TS_IGNORECASE)
 110                for (i = 0; i < len; i++)
 111                        kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
 112        else
 113                memcpy(kmp->pattern, pattern, len);
 114
 115        return conf;
 116}
 117
 118static void *kmp_get_pattern(struct ts_config *conf)
 119{
 120        struct ts_kmp *kmp = ts_config_priv(conf);
 121        return kmp->pattern;
 122}
 123
 124static unsigned int kmp_get_pattern_len(struct ts_config *conf)
 125{
 126        struct ts_kmp *kmp = ts_config_priv(conf);
 127        return kmp->pattern_len;
 128}
 129
 130static struct ts_ops kmp_ops = {
 131        .name             = "kmp",
 132        .find             = kmp_find,
 133        .init             = kmp_init,
 134        .get_pattern      = kmp_get_pattern,
 135        .get_pattern_len  = kmp_get_pattern_len,
 136        .owner            = THIS_MODULE,
 137        .list             = LIST_HEAD_INIT(kmp_ops.list)
 138};
 139
 140static int __init init_kmp(void)
 141{
 142        return textsearch_register(&kmp_ops);
 143}
 144
 145static void __exit exit_kmp(void)
 146{
 147        textsearch_unregister(&kmp_ops);
 148}
 149
 150MODULE_LICENSE("GPL");
 151
 152module_init(init_kmp);
 153module_exit(exit_kmp);
 154