2 +++ b/include/linux/netfilter_ipv4/ipt_string.h
7 +#define IPT_STRING_MAX_PATTERN_SIZE 128
8 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16
10 +struct ipt_string_info
12 + u_int16_t from_offset;
13 + u_int16_t to_offset;
14 + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE];
15 + char pattern[IPT_STRING_MAX_PATTERN_SIZE];
18 + struct ts_config __attribute__((aligned(8))) *config;
21 +#endif /*_IPT_STRING_H*/
22 --- a/net/ipv4/netfilter/Config.in
23 +++ b/net/ipv4/netfilter/Config.in
24 @@ -61,6 +61,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ];
26 if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
27 dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES
28 + dep_tristate ' String match support (EXPERIMENTAL) ' CONFIG_IP_NF_MATCH_STRING $CONFIG_IP_NF_IPTABLES
29 dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES
30 dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK
31 dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7
33 +++ b/net/ipv4/netfilter/ipt_string.c
35 +/* String matching match for iptables
37 + * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
39 + * This program is free software; you can redistribute it and/or modify
40 + * it under the terms of the GNU General Public License version 2 as
41 + * published by the Free Software Foundation.
44 +#include <linux/init.h>
45 +#include <linux/module.h>
46 +#include <linux/kernel.h>
47 +#include <linux/skbuff.h>
48 +#include <linux/netfilter_ipv4/ip_tables.h>
49 +#include <linux/netfilter_ipv4/ipt_string.h>
50 +#include "textsearch/textsearch.h"
51 +#include "textsearch/textsearch.c"
52 +#include "textsearch/ts_bm.c"
53 +#include "textsearch/ts_kmp.c"
55 +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
56 +MODULE_DESCRIPTION("IP tables string match module");
57 +MODULE_LICENSE("GPL");
59 +static int match(const struct sk_buff *skb,
60 + const struct net_device *in,
61 + const struct net_device *out,
62 + const void *matchinfo,
66 + struct iphdr *ip = skb->nh.iph;
67 + struct ts_state state;
68 + struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
69 + char *buf = (char *)ip+(ip->ihl*4);
70 + int len = ntohs(ip->tot_len)-(ip->ihl*4);
72 + memset(&state, 0, sizeof(struct ts_state));
74 + return (textsearch_find_continuous(conf->config, &state, buf, len) != UINT_MAX) && !conf->invert;
77 +#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
79 +static int checkentry(const char *tablename,
80 + const struct ipt_ip *ip,
82 + unsigned int matchsize,
83 + unsigned int hook_mask)
85 + struct ipt_string_info *conf = matchinfo;
86 + struct ts_config *ts_conf;
88 + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
91 + /* Damn, can't handle this case properly with iptables... */
92 + if (conf->from_offset > conf->to_offset)
95 + ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
96 + GFP_KERNEL, TS_AUTOLOAD);
97 + if (IS_ERR(ts_conf))
100 + conf->config = ts_conf;
105 +static void destroy(void *matchinfo, unsigned int matchsize)
107 + textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
110 +static struct ipt_match string_match = {
113 + .checkentry = checkentry,
114 + .destroy = destroy,
118 +static int __init init(void)
122 + return ipt_register_match(&string_match);
125 +static void __exit fini(void)
129 + ipt_unregister_match(&string_match);
134 --- a/net/ipv4/netfilter/Makefile
135 +++ b/net/ipv4/netfilter/Makefile
136 @@ -99,6 +99,7 @@ obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_s
137 obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
138 obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
139 obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
140 +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
141 obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
142 obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o
145 +++ b/net/ipv4/netfilter/textsearch/textsearch.c
148 + * lib/textsearch.c Generic text search interface
150 + * This program is free software; you can redistribute it and/or
151 + * modify it under the terms of the GNU General Public License
152 + * as published by the Free Software Foundation; either version
153 + * 2 of the License, or (at your option) any later version.
155 + * Authors: Thomas Graf <tgraf@suug.ch>
156 + * Pablo Neira Ayuso <pablo@eurodev.net>
158 + * ==========================================================================
162 + * The textsearch infrastructure provides text searching facitilies for
163 + * both linear and non-linear data. Individual search algorithms are
164 + * implemented in modules and chosen by the user.
169 + * +----------------+
170 + * | finish()|<--------------(6)-----------------+
171 + * |get_next_block()|<--------------(5)---------------+ |
172 + * | | Algorithm | |
173 + * | | +------------------------------+
174 + * | | | init() find() destroy() |
175 + * | | +------------------------------+
176 + * | | Core API ^ ^ ^
177 + * | | +---------------+ (2) (4) (8)
178 + * | (1)|----->| prepare() |---+ | |
179 + * | (3)|----->| find()/next() |-----------+ |
180 + * | (7)|----->| destroy() |----------------------+
181 + * +----------------+ +---------------+
183 + * (1) User configures a search by calling _prepare() specifying the
184 + * search parameters such as the pattern and algorithm name.
185 + * (2) Core requests the algorithm to allocate and initialize a search
186 + * configuration according to the specified parameters.
187 + * (3) User starts the search(es) by calling _find() or _next() to
188 + * fetch subsequent occurrences. A state variable is provided
189 + * to the algorihtm to store persistant variables.
190 + * (4) Core eventually resets the search offset and forwards the find()
191 + * request to the algorithm.
192 + * (5) Algorithm calls get_next_block() provided by the user continously
193 + * to fetch the data to be searched in block by block.
194 + * (6) Algorithm invokes finish() after the last call to get_next_block
195 + * to clean up any leftovers from get_next_block. (Optional)
196 + * (7) User destroys the configuration by calling _destroy().
197 + * (8) Core notifies the algorithm to destroy algorithm specific
198 + * allocations. (Optional)
202 + * Before a search can be performed, a configuration must be created
203 + * by calling textsearch_prepare() specyfing the searching algorithm and
204 + * the pattern to look for. The returned configuration may then be used
205 + * for an arbitary amount of times and even in parallel as long as a
206 + * separate struct ts_state variable is provided to every instance.
208 + * The actual search is performed by either calling textsearch_find_-
209 + * continuous() for linear data or by providing an own get_next_block()
210 + * implementation and calling textsearch_find(). Both functions return
211 + * the position of the first occurrence of the patern or UINT_MAX if
212 + * no match was found. Subsequent occurences can be found by calling
213 + * textsearch_next() regardless of the linearity of the data.
215 + * Once you're done using a configuration it must be given back via
216 + * textsearch_destroy.
221 + * struct ts_config *conf;
222 + * struct ts_state state;
223 + * const char *pattern = "chicken";
224 + * const char *example = "We dance the funky chicken";
226 + * conf = textsearch_prepare("kmp", pattern, strlen(pattern),
227 + * GFP_KERNEL, TS_AUTOLOAD);
228 + * if (IS_ERR(conf)) {
229 + * err = PTR_ERR(conf);
233 + * pos = textsearch_find_continuous(conf, &state, example, strlen(example));
234 + * if (pos != UINT_MAX)
235 + * panic("Oh my god, dancing chickens at %d\n", pos);
237 + * textsearch_destroy(conf);
239 + * ==========================================================================
242 +#include <linux/config.h>
243 +#include <linux/module.h>
244 +#include <linux/types.h>
245 +#include <linux/string.h>
246 +#include <linux/init.h>
247 +#include <linux/netfilter_ipv4/lockhelp.h>
248 +#include "textsearch.h"
250 +static LIST_HEAD(ts_ops);
251 +static spinlock_t ts_mod_lock = SPIN_LOCK_UNLOCKED;
252 +static DECLARE_RWLOCK(ts_ops_lock);
254 +static inline struct ts_ops *lookup_ts_algo(const char *name)
258 + read_lock(&ts_ops_lock);
259 + list_for_each_entry(o, &ts_ops, list) {
260 + if (!strcmp(name, o->name)) {
262 + read_unlock(&ts_ops_lock);
266 + read_unlock(&ts_ops_lock);
272 + * textsearch_register - register a textsearch module
273 + * @ops: operations lookup table
275 + * This function must be called by textsearch modules to announce
276 + * their presence. The specified &@ops must have %name set to a
277 + * unique identifier and the callbacks find(), init(), get_pattern(),
278 + * and get_pattern_len() must be implemented.
280 + * Returns 0 or -EEXISTS if another module has already registered
283 +int textsearch_register(struct ts_ops *ops)
288 + if (ops->name == NULL || ops->find == NULL || ops->init == NULL ||
289 + ops->get_pattern == NULL || ops->get_pattern_len == NULL)
292 + spin_lock(&ts_mod_lock);
293 + list_for_each_entry(o, &ts_ops, list) {
294 + if (!strcmp(ops->name, o->name))
298 + write_lock(&ts_ops_lock);
299 + list_add_tail(&ops->list, &ts_ops);
300 + write_unlock(&ts_ops_lock);
304 + spin_unlock(&ts_mod_lock);
309 + * textsearch_unregister - unregister a textsearch module
310 + * @ops: operations lookup table
312 + * This function must be called by textsearch modules to announce
313 + * their disappearance for examples when the module gets unloaded.
314 + * The &ops parameter must be the same as the one during the
317 + * Returns 0 on success or -ENOENT if no matching textsearch
318 + * registration was found.
320 +int textsearch_unregister(struct ts_ops *ops)
325 + spin_lock(&ts_mod_lock);
326 + list_for_each_entry(o, &ts_ops, list) {
328 + write_lock(&ts_ops_lock);
329 + list_del(&o->list);
330 + write_unlock(&ts_ops_lock);
337 + spin_unlock(&ts_mod_lock);
341 +struct ts_linear_state
347 +static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
348 + struct ts_config *conf,
349 + struct ts_state *state)
351 + struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
353 + if (likely(consumed < st->len)) {
354 + *dst = st->data + consumed;
355 + return st->len - consumed;
362 + * textsearch_find_continuous - search a pattern in continuous/linear data
363 + * @conf: search configuration
364 + * @state: search state
365 + * @data: data to search in
366 + * @len: length of data
368 + * A simplified version of textsearch_find() for continuous/linear data.
369 + * Call textsearch_next() to retrieve subsequent matches.
371 + * Returns the position of first occurrence of the pattern or
372 + * UINT_MAX if no occurrence was found.
374 +unsigned int textsearch_find_continuous(struct ts_config *conf,
375 + struct ts_state *state,
376 + const void *data, unsigned int len)
378 + struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
380 + conf->get_next_block = get_linear_data;
384 + return textsearch_find(conf, state);
388 + * textsearch_prepare - Prepare a search
389 + * @algo: name of search algorithm
390 + * @pattern: pattern data
391 + * @len: length of pattern
392 + * @gfp_mask: allocation mask
393 + * @flags: search flags
395 + * Looks up the search algorithm module and creates a new textsearch
396 + * configuration for the specified pattern. Upon completion all
397 + * necessary refcnts are held and the configuration must be put back
398 + * using textsearch_put() after usage.
400 + * Note: The format of the pattern may not be compatible between
401 + * the various search algorithms.
403 + * Returns a new textsearch configuration according to the specified
404 + * parameters or a ERR_PTR().
406 +struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
407 + unsigned int len, gfp_t gfp_mask, int flags)
410 + struct ts_config *conf;
411 + struct ts_ops *ops;
413 + ops = lookup_ts_algo(algo);
418 + conf = ops->init(pattern, len, gfp_mask);
419 + if (IS_ERR(conf)) {
420 + err = PTR_ERR(conf);
431 + return ERR_PTR(err);
435 + * textsearch_destroy - destroy a search configuration
436 + * @conf: search configuration
438 + * Releases all references of the configuration and frees
441 +void textsearch_destroy(struct ts_config *conf)
444 + if (conf->ops->destroy)
445 + conf->ops->destroy(conf);
453 +++ b/net/ipv4/netfilter/textsearch/textsearch.h
455 +#ifndef __LINUX_TEXTSEARCH_H
456 +#define __LINUX_TEXTSEARCH_H
460 +#include <linux/types.h>
461 +#include <linux/list.h>
462 +#include <linux/kernel.h>
463 +#include <linux/module.h>
464 +#include <linux/slab.h>
470 + * TS_AUTOLOAD - Automatically load textsearch modules when needed
472 +#define TS_AUTOLOAD 1
475 + * struct ts_state - search state
476 + * @offset: offset for next match
477 + * @cb: control buffer, for persistant variables of get_next_block()
481 + unsigned int offset;
486 + * struct ts_ops - search module operations
487 + * @name: name of search algorithm
488 + * @init: initialization function to prepare a search
489 + * @find: find the next occurrence of the pattern
490 + * @destroy: destroy algorithm specific parts of a search configuration
491 + * @get_pattern: return head of pattern
492 + * @get_pattern_len: return length of pattern
493 + * @owner: module reference to algorithm
498 + struct ts_config * (*init)(const void *, unsigned int, gfp_t);
499 + unsigned int (*find)(struct ts_config *,
500 + struct ts_state *);
501 + void (*destroy)(struct ts_config *);
502 + void * (*get_pattern)(struct ts_config *);
503 + unsigned int (*get_pattern_len)(struct ts_config *);
504 + struct module *owner;
505 + struct list_head list;
509 + * struct ts_config - search configuration
510 + * @ops: operations of chosen algorithm
511 + * @get_next_block: callback to fetch the next block to search in
512 + * @finish: callback to finalize a search
516 + struct ts_ops *ops;
519 + * get_next_block - fetch next block of data
520 + * @consumed: number of bytes consumed by the caller
521 + * @dst: destination buffer
522 + * @conf: search configuration
523 + * @state: search state
525 + * Called repeatedly until 0 is returned. Must assign the
526 + * head of the next block of data to &*dst and return the length
527 + * of the block or 0 if at the end. consumed == 0 indicates
528 + * a new search. May store/read persistant values in state->cb.
530 + unsigned int (*get_next_block)(unsigned int consumed,
532 + struct ts_config *conf,
533 + struct ts_state *state);
536 + * finish - finalize/clean a series of get_next_block() calls
537 + * @conf: search configuration
538 + * @state: search state
540 + * Called after the last use of get_next_block(), may be used
541 + * to cleanup any leftovers.
543 + void (*finish)(struct ts_config *conf,
544 + struct ts_state *state);
548 + * textsearch_next - continue searching for a pattern
549 + * @conf: search configuration
550 + * @state: search state
552 + * Continues a search looking for more occurrences of the pattern.
553 + * textsearch_find() must be called to find the first occurrence
554 + * in order to reset the state.
556 + * Returns the position of the next occurrence of the pattern or
557 + * UINT_MAX if not match was found.
559 +static inline unsigned int textsearch_next(struct ts_config *conf,
560 + struct ts_state *state)
562 + unsigned int ret = conf->ops->find(conf, state);
565 + conf->finish(conf, state);
571 + * textsearch_find - start searching for a pattern
572 + * @conf: search configuration
573 + * @state: search state
575 + * Returns the position of first occurrence of the pattern or
576 + * UINT_MAX if no match was found.
578 +static inline unsigned int textsearch_find(struct ts_config *conf,
579 + struct ts_state *state)
582 + return textsearch_next(conf, state);
586 + * textsearch_get_pattern - return head of the pattern
587 + * @conf: search configuration
589 +static inline void *textsearch_get_pattern(struct ts_config *conf)
591 + return conf->ops->get_pattern(conf);
595 + * textsearch_get_pattern_len - return length of the pattern
596 + * @conf: search configuration
598 +static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf)
600 + return conf->ops->get_pattern_len(conf);
603 +extern int textsearch_register(struct ts_ops *);
604 +extern int textsearch_unregister(struct ts_ops *);
605 +extern struct ts_config *textsearch_prepare(const char *, const void *,
606 + unsigned int, gfp_t, int);
607 +extern void textsearch_destroy(struct ts_config *conf);
608 +extern unsigned int textsearch_find_continuous(struct ts_config *,
610 + const void *, unsigned int);
613 +#define TS_PRIV_ALIGNTO 8
614 +#define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1))
616 +static inline struct ts_config *alloc_ts_config(size_t payload,
619 + struct ts_config *conf;
621 + conf = kmalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask);
623 + return ERR_PTR(-ENOMEM);
625 + memset(conf, 0, TS_PRIV_ALIGN(sizeof(*conf)) + payload);
629 +static inline void *ts_config_priv(struct ts_config *conf)
631 + return ((u8 *) conf + TS_PRIV_ALIGN(sizeof(struct ts_config)));
634 +#endif /* __KERNEL__ */
638 +++ b/net/ipv4/netfilter/textsearch/ts_bm.c
641 + * lib/ts_bm.c Boyer-Moore text search implementation
643 + * This program is free software; you can redistribute it and/or
644 + * modify it under the terms of the GNU General Public License
645 + * as published by the Free Software Foundation; either version
646 + * 2 of the License, or (at your option) any later version.
648 + * Authors: Pablo Neira Ayuso <pablo@eurodev.net>
650 + * ==========================================================================
652 + * Implements Boyer-Moore string matching algorithm:
654 + * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
655 + * Communications of the Association for Computing Machinery,
656 + * 20(10), 1977, pp. 762-772.
657 + * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
659 + * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
660 + * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
662 + * Note: Since Boyer-Moore (BM) performs searches for matchings from right
663 + * to left, it's still possible that a matching could be spread over
664 + * multiple blocks, in that case this algorithm won't find any coincidence.
666 + * If you're willing to ensure that such thing won't ever happen, use the
667 + * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose
668 + * the proper string search algorithm depending on your setting.
670 + * Say you're using the textsearch infrastructure for filtering, NIDS or
671 + * any similar security focused purpose, then go KMP. Otherwise, if you
672 + * really care about performance, say you're classifying packets to apply
673 + * Quality of Service (QoS) policies, and you don't mind about possible
674 + * matchings spread over multiple fragments, then go BM.
677 +#include <linux/config.h>
678 +#include <linux/kernel.h>
679 +#include <linux/module.h>
680 +#include <linux/types.h>
681 +#include <linux/string.h>
682 +#include "textsearch.h"
684 +/* Alphabet size, use ASCII */
688 +#define DEBUGP printk
690 +#define DEBUGP(args, format...)
696 + unsigned int patlen;
697 + unsigned int bad_shift[ASIZE];
698 + unsigned int good_shift[0];
701 +static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
703 + struct ts_bm *bm = ts_config_priv(conf);
704 + unsigned int i, text_len, consumed = state->offset;
706 + int shift = bm->patlen, bs;
709 + text_len = conf->get_next_block(consumed, &text, conf, state);
711 + if (unlikely(text_len == 0))
714 + while (shift < text_len) {
715 + DEBUGP("Searching in position %d (%c)\n",
716 + shift, text[shift]);
717 + for (i = 0; i < bm->patlen; i++)
718 + if (text[shift-i] != bm->pattern[bm->patlen-1-i])
721 + /* London calling... */
722 + DEBUGP("found!\n");
723 + return consumed += (shift-(bm->patlen-1));
725 +next: bs = bm->bad_shift[text[shift-i]];
727 + /* Now jumping to... */
728 + shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
730 + consumed += text_len;
736 +static int subpattern(u8 *pattern, int i, int j, int g)
738 + int x = i+g-1, y = j+g-1, ret = 0;
740 + while(pattern[x--] == pattern[y--]) {
746 + ret = pattern[i-1] != pattern[j-1];
754 +static void bm_compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
759 + for (i = 0; i < ASIZE; i++)
760 + bm->bad_shift[i] = len;
761 + for (i = 0; i < len - 1; i++)
762 + bm->bad_shift[pattern[i]] = len - 1 - i;
764 + /* Compute the good shift array, used to match reocurrences
765 + * of a subpattern */
766 + bm->good_shift[0] = 1;
767 + for (i = 1; i < bm->patlen; i++)
768 + bm->good_shift[i] = bm->patlen;
769 + for (i = bm->patlen-1, g = 1; i > 0; g++, i--) {
770 + for (j = i-1; j >= 1-g ; j--)
771 + if (subpattern(bm->pattern, i, j, g)) {
772 + bm->good_shift[g] = bm->patlen-j-g;
778 +static struct ts_config *bm_init(const void *pattern, unsigned int len,
781 + struct ts_config *conf;
783 + unsigned int prefix_tbl_len = len * sizeof(unsigned int);
784 + size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
786 + conf = alloc_ts_config(priv_size, gfp_mask);
790 + bm = ts_config_priv(conf);
792 + bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
793 + bm_compute_prefix_tbl(bm, pattern, len);
794 + memcpy(bm->pattern, pattern, len);
799 +static void *bm_get_pattern(struct ts_config *conf)
801 + struct ts_bm *bm = ts_config_priv(conf);
802 + return bm->pattern;
805 +static unsigned int bm_get_pattern_len(struct ts_config *conf)
807 + struct ts_bm *bm = ts_config_priv(conf);
811 +static struct ts_ops bm_ops = {
815 + .get_pattern = bm_get_pattern,
816 + .get_pattern_len = bm_get_pattern_len,
817 + .owner = THIS_MODULE,
818 + .list = LIST_HEAD_INIT(bm_ops.list)
821 +static int __init init_bm(void)
823 + return textsearch_register(&bm_ops);
826 +static void __exit exit_bm(void)
828 + textsearch_unregister(&bm_ops);
831 +++ b/net/ipv4/netfilter/textsearch/ts_kmp.c
834 + * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation
836 + * This program is free software; you can redistribute it and/or
837 + * modify it under the terms of the GNU General Public License
838 + * as published by the Free Software Foundation; either version
839 + * 2 of the License, or (at your option) any later version.
841 + * Authors: Thomas Graf <tgraf@suug.ch>
843 + * ==========================================================================
845 + * Implements a linear-time string-matching algorithm due to Knuth,
846 + * Morris, and Pratt [1]. Their algorithm avoids the explicit
847 + * computation of the transition function DELTA altogether. Its
848 + * matching time is O(n), for n being length(text), using just an
849 + * auxiliary function PI[1..m], for m being length(pattern),
850 + * precomputed from the pattern in time O(m). The array PI allows
851 + * the transition function DELTA to be computed efficiently
852 + * "on the fly" as needed. Roughly speaking, for any state
853 + * "q" = 0,1,...,m and any character "a" in SIGMA, the value
854 + * PI["q"] contains the information that is independent of "a" and
855 + * is needed to compute DELTA("q", "a") [2]. Since the array PI
856 + * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
857 + * save a factor of |SIGMA| in the preprocessing time by computing
858 + * PI rather than DELTA.
860 + * [1] Cormen, Leiserson, Rivest, Stein
861 + * Introdcution to Algorithms, 2nd Edition, MIT Press
862 + * [2] See finite automation theory
865 +#include <linux/config.h>
866 +#include <linux/module.h>
867 +#include <linux/types.h>
868 +#include <linux/string.h>
869 +#include "textsearch.h"
874 + unsigned int pattern_len;
875 + unsigned int prefix_tbl[0];
878 +static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
880 + struct ts_kmp *kmp = ts_config_priv(conf);
881 + unsigned int i, q = 0, text_len, consumed = state->offset;
885 + text_len = conf->get_next_block(consumed, &text, conf, state);
887 + if (unlikely(text_len == 0))
890 + for (i = 0; i < text_len; i++) {
891 + while (q > 0 && kmp->pattern[q] != text[i])
892 + q = kmp->prefix_tbl[q - 1];
893 + if (kmp->pattern[q] == text[i])
895 + if (unlikely(q == kmp->pattern_len)) {
896 + state->offset = consumed + i + 1;
897 + return state->offset - kmp->pattern_len;
901 + consumed += text_len;
907 +static inline void kmp_compute_prefix_tbl(const u8 *pattern, unsigned int len,
908 + unsigned int *prefix_tbl)
912 + for (k = 0, q = 1; q < len; q++) {
913 + while (k > 0 && pattern[k] != pattern[q])
914 + k = prefix_tbl[k-1];
915 + if (pattern[k] == pattern[q])
921 +static struct ts_config *kmp_init(const void *pattern, unsigned int len,
924 + struct ts_config *conf;
925 + struct ts_kmp *kmp;
926 + unsigned int prefix_tbl_len = len * sizeof(unsigned int);
927 + size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
929 + conf = alloc_ts_config(priv_size, gfp_mask);
933 + kmp = ts_config_priv(conf);
934 + kmp->pattern_len = len;
935 + kmp_compute_prefix_tbl(pattern, len, kmp->prefix_tbl);
936 + kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
937 + memcpy(kmp->pattern, pattern, len);
942 +static void *kmp_get_pattern(struct ts_config *conf)
944 + struct ts_kmp *kmp = ts_config_priv(conf);
945 + return kmp->pattern;
948 +static unsigned int kmp_get_pattern_len(struct ts_config *conf)
950 + struct ts_kmp *kmp = ts_config_priv(conf);
951 + return kmp->pattern_len;
954 +static struct ts_ops kmp_ops = {
958 + .get_pattern = kmp_get_pattern,
959 + .get_pattern_len = kmp_get_pattern_len,
960 + .owner = THIS_MODULE,
961 + .list = LIST_HEAD_INIT(kmp_ops.list)
964 +static int __init init_kmp(void)
966 + return textsearch_register(&kmp_ops);
969 +static void __exit exit_kmp(void)
971 + textsearch_unregister(&kmp_ops);