2 +++ b/drivers/net/imq.c
5 + * Pseudo-driver for the intermediate queue device.
7 + * This program is free software; you can redistribute it and/or
8 + * modify it under the terms of the GNU General Public License
9 + * as published by the Free Software Foundation; either version
10 + * 2 of the License, or (at your option) any later version.
12 + * Authors: Patrick McHardy, <kaber@trash.net>
14 + * The first version was written by Martin Devera, <devik@cdi.cz>
16 + * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
17 + * - Update patch to 2.4.21
18 + * Sebastian Strollo <sstrollo@nortelnetworks.com>
19 + * - Fix "Dead-loop on netdevice imq"-issue
20 + * Marcel Sebek <sebek64@post.cz>
21 + * - Update to 2.6.2-rc1
23 + * After some time of inactivity there is a group taking care
24 + * of IMQ again: http://www.linuximq.net
27 + * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
28 + * including the following changes:
30 + * - Correction of ipv6 support "+"s issue (Hasso Tepper)
31 + * - Correction of imq_init_devs() issue that resulted in
32 + * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
33 + * - Addition of functionality to choose number of IMQ devices
34 + * during kernel config (Andre Correa)
35 + * - Addition of functionality to choose how IMQ hooks on
36 + * PRE and POSTROUTING (after or before NAT) (Andre Correa)
37 + * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
40 + * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
41 + * released with almost no problems. 2.6.14-x was released
42 + * with some important changes: nfcache was removed; After
43 + * some weeks of trouble we figured out that some IMQ fields
44 + * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
45 + * These functions are correctly patched by this new patch version.
47 + * Thanks for all who helped to figure out all the problems with
48 + * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
49 + * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
50 + * I didn't forget anybody). I apologize again for my lack of time.
53 + * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
54 + * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
55 + * recursive locking. New initialization routines to fix 'rmmod' not
56 + * working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + * 2008/08/06 - 2.6.26 - (JK)
59 + * - Replaced tasklet with 'netif_schedule()'.
60 + * - Cleaned up and added comments for imq_nf_queue().
63 + * - Add skb_save_cb/skb_restore_cb helper functions for backuping
64 + * control buffer. This is needed because qdisc-layer on kernels
65 + * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
66 + * - Add better locking for IMQ device. Hopefully this will solve
67 + * SMP issues. (Jussi Kivilinna)
70 + * 2009/04/20 - (Jussi Kivilinna)
71 + * - Fix rmmod not working
72 + * - Use netdevice feature flags to avoid extra packet handling
73 + * by core networking layer and possibly increase performance.
75 + * Also, many thanks to pablo Sebastian Greco for making the initial
76 + * patch and to those who helped the testing.
78 + * More info at: http://www.linuximq.net/ (Andre Correa)
81 +#include <linux/module.h>
82 +#include <linux/kernel.h>
83 +#include <linux/moduleparam.h>
84 +#include <linux/list.h>
85 +#include <linux/skbuff.h>
86 +#include <linux/netdevice.h>
87 +#include <linux/etherdevice.h>
88 +#include <linux/rtnetlink.h>
89 +#include <linux/if_arp.h>
90 +#include <linux/netfilter.h>
91 +#include <linux/netfilter_ipv4.h>
92 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
93 + #include <linux/netfilter_ipv6.h>
95 +#include <linux/imq.h>
96 +#include <net/pkt_sched.h>
97 +#include <net/netfilter/nf_queue.h>
99 +static nf_hookfn imq_nf_hook;
101 +static struct nf_hook_ops imq_ingress_ipv4 = {
102 + .hook = imq_nf_hook,
103 + .owner = THIS_MODULE,
105 + .hooknum = NF_INET_PRE_ROUTING,
106 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
107 + .priority = NF_IP_PRI_MANGLE + 1
109 + .priority = NF_IP_PRI_NAT_DST + 1
113 +static struct nf_hook_ops imq_egress_ipv4 = {
114 + .hook = imq_nf_hook,
115 + .owner = THIS_MODULE,
117 + .hooknum = NF_INET_POST_ROUTING,
118 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
119 + .priority = NF_IP_PRI_LAST
121 + .priority = NF_IP_PRI_NAT_SRC - 1
125 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
126 +static struct nf_hook_ops imq_ingress_ipv6 = {
127 + .hook = imq_nf_hook,
128 + .owner = THIS_MODULE,
130 + .hooknum = NF_INET_PRE_ROUTING,
131 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
132 + .priority = NF_IP6_PRI_MANGLE + 1
134 + .priority = NF_IP6_PRI_NAT_DST + 1
138 +static struct nf_hook_ops imq_egress_ipv6 = {
139 + .hook = imq_nf_hook,
140 + .owner = THIS_MODULE,
142 + .hooknum = NF_INET_POST_ROUTING,
143 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
144 + .priority = NF_IP6_PRI_LAST
146 + .priority = NF_IP6_PRI_NAT_SRC - 1
151 +#if defined(CONFIG_IMQ_NUM_DEVS)
152 +static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS;
154 +static unsigned int numdevs = IMQ_MAX_DEVS;
157 +static DEFINE_SPINLOCK(imq_nf_queue_lock);
159 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
162 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
164 + return &dev->stats;
167 +/* called for packets kfree'd in qdiscs at places other than enqueue */
168 +static void imq_skb_destructor(struct sk_buff *skb)
170 + struct nf_queue_entry *entry = skb->nf_queue_entry;
173 + nf_queue_entry_release_refs(entry);
177 + skb_restore_cb(skb); /* kfree backup */
180 +static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
184 + if (!entry->next_outfn) {
185 + spin_lock_bh(&imq_nf_queue_lock);
186 + nf_reinject(entry, verdict);
187 + spin_unlock_bh(&imq_nf_queue_lock);
192 + local_bh_disable();
193 + status = entry->next_outfn(entry, entry->next_queuenum);
196 + nf_queue_entry_release_refs(entry);
197 + kfree_skb(entry->skb);
204 +static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
206 + dev->stats.tx_bytes += skb->len;
207 + dev->stats.tx_packets++;
209 + skb->imq_flags = 0;
210 + skb->destructor = NULL;
212 + skb_restore_cb(skb); /* restore skb->cb */
214 + dev->trans_start = jiffies;
215 + imq_nf_reinject(skb->nf_queue_entry, NF_ACCEPT);
219 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
221 + struct net_device *dev;
222 + struct sk_buff *skb_orig, *skb, *skb_shared;
224 + struct netdev_queue *txq;
226 + int retval = -EINVAL;
228 + index = entry->skb->imq_flags & IMQ_F_IFMASK;
229 + if (unlikely(index > numdevs - 1)) {
230 + if (net_ratelimit())
231 + printk(KERN_WARNING
232 + "IMQ: invalid device specified, highest is %u\n",
238 + /* check for imq device by index from cache */
239 + dev = imq_devs_cache[index];
240 + if (unlikely(!dev)) {
243 + /* get device by name and cache result */
244 + snprintf(buf, sizeof(buf), "imq%d", index);
245 + dev = dev_get_by_name(&init_net, buf);
253 + imq_devs_cache[index] = dev;
257 + if (unlikely(!(dev->flags & IFF_UP))) {
258 + entry->skb->imq_flags = 0;
259 + imq_nf_reinject(entry, NF_ACCEPT);
263 + dev->last_rx = jiffies;
268 + /* skb has owner? => make clone */
269 + if (unlikely(skb->destructor)) {
271 + skb = skb_clone(skb, GFP_ATOMIC);
279 + skb->nf_queue_entry = entry;
281 + dev->stats.rx_bytes += skb->len;
282 + dev->stats.rx_packets++;
284 + txq = dev_pick_tx(dev, skb);
286 + q = rcu_dereference(txq->qdisc);
287 + if (unlikely(!q->enqueue))
288 + goto packet_not_eaten_by_imq_dev;
290 + spin_lock_bh(qdisc_lock(q));
292 + users = atomic_read(&skb->users);
294 + skb_shared = skb_get(skb); /* increase reference count by one */
295 + skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
297 + qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
299 + if (likely(atomic_read(&skb_shared->users) == users + 1)) {
300 + kfree_skb(skb_shared); /* decrease reference count by one */
302 + skb->destructor = &imq_skb_destructor;
306 + kfree_skb(skb_orig); /* free original */
308 + spin_unlock_bh(qdisc_lock(q));
310 + /* schedule qdisc dequeue */
311 + __netif_schedule(q);
316 + skb_restore_cb(skb_shared); /* restore skb->cb */
317 + /* qdisc dropped packet and decreased skb reference count of
318 + * skb, so we don't really want to and try refree as that would
319 + * actually destroy the skb. */
320 + spin_unlock_bh(qdisc_lock(q));
321 + goto packet_not_eaten_by_imq_dev;
324 +packet_not_eaten_by_imq_dev:
325 + /* cloned? restore original */
328 + entry->skb = skb_orig;
335 +static struct nf_queue_handler nfqh = {
337 + .outfn = imq_nf_queue,
340 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
341 + const struct net_device *indev,
342 + const struct net_device *outdev,
343 + int (*okfn)(struct sk_buff *))
345 + if (pskb->imq_flags & IMQ_F_ENQUEUE)
351 +static int imq_close(struct net_device *dev)
353 + netif_stop_queue(dev);
357 +static int imq_open(struct net_device *dev)
359 + netif_start_queue(dev);
363 +static void imq_setup(struct net_device *dev)
365 + dev->hard_start_xmit = imq_dev_xmit;
366 + dev->open = imq_open;
367 + dev->get_stats = imq_get_stats;
368 + dev->stop = imq_close;
369 + dev->type = ARPHRD_VOID;
371 + dev->tx_queue_len = 11000;
372 + dev->flags = IFF_NOARP;
373 + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
374 + NETIF_F_GSO | NETIF_F_HW_CSUM |
378 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
382 + if (tb[IFLA_ADDRESS]) {
383 + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
387 + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
388 + ret = -EADDRNOTAVAIL;
394 + printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
398 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
401 + .setup = imq_setup,
402 + .validate = imq_validate,
405 +static int __init imq_init_hooks(void)
409 + nf_register_queue_imq_handler(&nfqh);
411 + err = nf_register_hook(&imq_ingress_ipv4);
415 + err = nf_register_hook(&imq_egress_ipv4);
419 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
420 + err = nf_register_hook(&imq_ingress_ipv6);
424 + err = nf_register_hook(&imq_egress_ipv6);
431 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
433 + nf_unregister_hook(&imq_ingress_ipv6);
435 + nf_unregister_hook(&imq_egress_ipv4);
438 + nf_unregister_hook(&imq_ingress_ipv4);
440 + nf_unregister_queue_imq_handler();
444 +static int __init imq_init_one(int index)
446 + struct net_device *dev;
449 + dev = alloc_netdev(0, "imq%d", imq_setup);
453 + ret = dev_alloc_name(dev, dev->name);
457 + dev->rtnl_link_ops = &imq_link_ops;
458 + ret = register_netdevice(dev);
468 +static int __init imq_init_devs(void)
472 + if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
473 + printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
479 + err = __rtnl_link_register(&imq_link_ops);
481 + for (i = 0; i < numdevs && !err; i++)
482 + err = imq_init_one(i);
485 + __rtnl_link_unregister(&imq_link_ops);
486 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
493 +static int __init imq_init_module(void)
497 +#if defined(CONFIG_IMQ_NUM_DEVS)
498 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
499 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
500 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
503 + err = imq_init_devs();
505 + printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
509 + err = imq_init_hooks();
511 + printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
512 + rtnl_link_unregister(&imq_link_ops);
513 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
517 + printk(KERN_INFO "IMQ driver loaded successfully.\n");
519 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
520 + printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
522 + printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
524 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
525 + printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
527 + printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
533 +static void __exit imq_unhook(void)
535 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
536 + nf_unregister_hook(&imq_ingress_ipv6);
537 + nf_unregister_hook(&imq_egress_ipv6);
539 + nf_unregister_hook(&imq_ingress_ipv4);
540 + nf_unregister_hook(&imq_egress_ipv4);
542 + nf_unregister_queue_imq_handler();
545 +static void __exit imq_cleanup_devs(void)
547 + rtnl_link_unregister(&imq_link_ops);
548 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
551 +static void __exit imq_exit_module(void)
554 + imq_cleanup_devs();
555 + printk(KERN_INFO "IMQ driver unloaded successfully.\n");
558 +module_init(imq_init_module);
559 +module_exit(imq_exit_module);
561 +module_param(numdevs, int, 0);
562 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
564 +MODULE_AUTHOR("http://www.linuximq.net");
565 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
566 + "http://www.linuximq.net/ for more information.");
567 +MODULE_LICENSE("GPL");
568 +MODULE_ALIAS_RTNL_LINK("imq");
570 --- a/drivers/net/Kconfig
571 +++ b/drivers/net/Kconfig
572 @@ -109,6 +109,129 @@ config EQUALIZER
573 To compile this driver as a module, choose M here: the module
574 will be called eql. If unsure, say N.
577 + tristate "IMQ (intermediate queueing device) support"
578 + depends on NETDEVICES && NETFILTER
580 + The IMQ device(s) is used as placeholder for QoS queueing
581 + disciplines. Every packet entering/leaving the IP stack can be
582 + directed through the IMQ device where it's enqueued/dequeued to the
583 + attached qdisc. This allows you to treat network devices as classes
584 + and distribute bandwidth among them. Iptables is used to specify
585 + through which IMQ device, if any, packets travel.
587 + More information at: http://www.linuximq.net/
589 + To compile this driver as a module, choose M here: the module
590 + will be called imq. If unsure, say N.
593 + prompt "IMQ behavior (PRE/POSTROUTING)"
595 + default IMQ_BEHAVIOR_AB
598 + This settings defines how IMQ behaves in respect to its
599 + hooking in PREROUTING and POSTROUTING.
601 + IMQ can work in any of the following ways:
603 + PREROUTING | POSTROUTING
604 + -----------------|-------------------
605 + #1 After NAT | After NAT
606 + #2 After NAT | Before NAT
607 + #3 Before NAT | After NAT
608 + #4 Before NAT | Before NAT
610 + The default behavior is to hook before NAT on PREROUTING
611 + and after NAT on POSTROUTING (#3).
613 + This settings are specially usefull when trying to use IMQ
614 + to shape NATed clients.
616 + More information can be found at: www.linuximq.net
618 + If not sure leave the default settings alone.
620 +config IMQ_BEHAVIOR_AA
623 + This settings defines how IMQ behaves in respect to its
624 + hooking in PREROUTING and POSTROUTING.
626 + Choosing this option will make IMQ hook like this:
628 + PREROUTING: After NAT
629 + POSTROUTING: After NAT
631 + More information can be found at: www.linuximq.net
633 + If not sure leave the default settings alone.
635 +config IMQ_BEHAVIOR_AB
638 + This settings defines how IMQ behaves in respect to its
639 + hooking in PREROUTING and POSTROUTING.
641 + Choosing this option will make IMQ hook like this:
643 + PREROUTING: After NAT
644 + POSTROUTING: Before NAT
646 + More information can be found at: www.linuximq.net
648 + If not sure leave the default settings alone.
650 +config IMQ_BEHAVIOR_BA
653 + This settings defines how IMQ behaves in respect to its
654 + hooking in PREROUTING and POSTROUTING.
656 + Choosing this option will make IMQ hook like this:
658 + PREROUTING: Before NAT
659 + POSTROUTING: After NAT
661 + More information can be found at: www.linuximq.net
663 + If not sure leave the default settings alone.
665 +config IMQ_BEHAVIOR_BB
668 + This settings defines how IMQ behaves in respect to its
669 + hooking in PREROUTING and POSTROUTING.
671 + Choosing this option will make IMQ hook like this:
673 + PREROUTING: Before NAT
674 + POSTROUTING: Before NAT
676 + More information can be found at: www.linuximq.net
678 + If not sure leave the default settings alone.
684 + int "Number of IMQ devices"
690 + This settings defines how many IMQ devices will be
693 + The default value is 16.
695 + More information can be found at: www.linuximq.net
697 + If not sure leave the default settings alone.
700 tristate "Universal TUN/TAP device driver support"
702 --- a/drivers/net/Makefile
703 +++ b/drivers/net/Makefile
704 @@ -144,6 +144,7 @@ obj-$(CONFIG_SLHC) += slhc.o
705 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
707 obj-$(CONFIG_DUMMY) += dummy.o
708 +obj-$(CONFIG_IMQ) += imq.o
709 obj-$(CONFIG_IFB) += ifb.o
710 obj-$(CONFIG_MACVLAN) += macvlan.o
711 obj-$(CONFIG_DE600) += de600.o
713 +++ b/include/linux/imq.h
718 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
719 +#define IMQ_F_BITS 5
721 +#define IMQ_F_IFMASK 0x0f
722 +#define IMQ_F_ENQUEUE 0x10
724 +#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
729 +++ b/include/linux/netfilter_ipv4/ipt_IMQ.h
734 +/* Backwards compatibility for old userspace */
735 +#include <linux/netfilter/xt_IMQ.h>
737 +#define ipt_imq_info xt_imq_info
739 +#endif /* _IPT_IMQ_H */
742 +++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h
747 +/* Backwards compatibility for old userspace */
748 +#include <linux/netfilter/xt_IMQ.h>
750 +#define ip6t_imq_info xt_imq_info
752 +#endif /* _IP6T_IMQ_H */
754 --- a/include/linux/skbuff.h
755 +++ b/include/linux/skbuff.h
757 #include <linux/rcupdate.h>
758 #include <linux/dmaengine.h>
759 #include <linux/hrtimer.h>
760 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
761 +#include <linux/imq.h>
764 #define HAVE_ALLOC_SKB /* For the drivers to know */
765 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
766 @@ -272,6 +275,9 @@ struct sk_buff {
767 * first. This is owned by whoever has the skb queued ATM.
770 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
776 @@ -302,6 +308,9 @@ struct sk_buff {
777 struct nf_conntrack *nfct;
778 struct sk_buff *nfct_reasm;
780 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
781 + struct nf_queue_entry *nf_queue_entry;
783 #ifdef CONFIG_BRIDGE_NETFILTER
784 struct nf_bridge_info *nf_bridge;
786 @@ -321,6 +330,9 @@ struct sk_buff {
787 __u8 do_not_encrypt:1;
789 /* 0/13/14 bit hole */
790 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
791 + __u8 imq_flags:IMQ_F_BITS;
794 #ifdef CONFIG_NET_DMA
795 dma_cookie_t dma_cookie;
796 @@ -353,6 +365,12 @@ struct sk_buff {
798 #include <asm/system.h>
801 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
802 +extern int skb_save_cb(struct sk_buff *skb);
803 +extern int skb_restore_cb(struct sk_buff *skb);
806 extern void kfree_skb(struct sk_buff *skb);
807 extern void __kfree_skb(struct sk_buff *skb);
808 extern struct sk_buff *__alloc_skb(unsigned int size,
809 @@ -1633,6 +1651,10 @@ static inline void __nf_copy(struct sk_b
810 dst->nfct_reasm = src->nfct_reasm;
811 nf_conntrack_get_reasm(src->nfct_reasm);
813 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
814 + dst->imq_flags = src->imq_flags;
815 + dst->nf_queue_entry = src->nf_queue_entry;
817 #ifdef CONFIG_BRIDGE_NETFILTER
818 dst->nf_bridge = src->nf_bridge;
819 nf_bridge_get(src->nf_bridge);
823 #include <net/net_namespace.h>
824 #include <net/sock.h>
825 #include <linux/rtnetlink.h>
826 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
827 +#include <linux/imq.h>
829 #include <linux/proc_fs.h>
830 #include <linux/seq_file.h>
831 #include <linux/stat.h>
832 @@ -1624,7 +1627,11 @@ int dev_hard_start_xmit(struct sk_buff *
833 struct netdev_queue *txq)
835 if (likely(!skb->next)) {
836 - if (!list_empty(&ptype_all))
837 + if (!list_empty(&ptype_all)
838 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
839 + && !(skb->imq_flags & IMQ_F_ENQUEUE)
842 dev_queue_xmit_nit(skb, dev);
844 if (netif_needs_gso(dev, skb)) {
845 @@ -1715,8 +1722,7 @@ static u16 simple_tx_hash(struct net_dev
846 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
849 -static struct netdev_queue *dev_pick_tx(struct net_device *dev,
850 - struct sk_buff *skb)
851 +struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb)
855 @@ -1728,6 +1734,7 @@ static struct netdev_queue *dev_pick_tx(
856 skb_set_queue_mapping(skb, queue_index);
857 return netdev_get_tx_queue(dev, queue_index);
859 +EXPORT_SYMBOL(dev_pick_tx);
862 * dev_queue_xmit - transmit a buffer
863 --- a/include/linux/netdevice.h
864 +++ b/include/linux/netdevice.h
865 @@ -915,6 +915,7 @@ extern int dev_alloc_name(struct net_de
866 extern int dev_open(struct net_device *dev);
867 extern int dev_close(struct net_device *dev);
868 extern void dev_disable_lro(struct net_device *dev);
869 +extern struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb);
870 extern int dev_queue_xmit(struct sk_buff *skb);
871 extern int register_netdevice(struct net_device *dev);
872 extern void unregister_netdevice(struct net_device *dev);
874 +++ b/include/linux/netfilter/xt_IMQ.h
879 +struct xt_imq_info {
880 + unsigned int todev; /* target imq device */
883 +#endif /* _XT_IMQ_H */
885 --- a/include/net/netfilter/nf_queue.h
886 +++ b/include/net/netfilter/nf_queue.h
887 @@ -13,6 +13,12 @@ struct nf_queue_entry {
888 struct net_device *indev;
889 struct net_device *outdev;
890 int (*okfn)(struct sk_buff *);
892 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
893 + int (*next_outfn)(struct nf_queue_entry *entry,
894 + unsigned int queuenum);
895 + unsigned int next_queuenum;
899 #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
900 @@ -30,5 +36,11 @@ extern int nf_unregister_queue_handler(i
901 const struct nf_queue_handler *qh);
902 extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
903 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
904 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
906 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
907 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
908 +extern void nf_unregister_queue_imq_handler(void);
911 #endif /* _NF_QUEUE_H */
912 --- a/net/core/skbuff.c
913 +++ b/net/core/skbuff.c
916 static struct kmem_cache *skbuff_head_cache __read_mostly;
917 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
918 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
919 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
922 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
923 struct pipe_buffer *buf)
924 @@ -88,6 +91,80 @@ static int sock_pipe_buf_steal(struct pi
928 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
929 +/* Control buffer save/restore for IMQ devices */
930 +struct skb_cb_table {
936 +static DEFINE_SPINLOCK(skb_cb_store_lock);
938 +int skb_save_cb(struct sk_buff *skb)
940 + struct skb_cb_table *next;
942 + next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
946 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
948 + memcpy(next->cb, skb->cb, sizeof(skb->cb));
949 + next->cb_next = skb->cb_next;
951 + atomic_set(&next->refcnt, 1);
953 + skb->cb_next = next;
956 +EXPORT_SYMBOL(skb_save_cb);
958 +int skb_restore_cb(struct sk_buff *skb)
960 + struct skb_cb_table *next;
965 + next = skb->cb_next;
967 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
969 + memcpy(skb->cb, next->cb, sizeof(skb->cb));
970 + skb->cb_next = next->cb_next;
972 + spin_lock(&skb_cb_store_lock);
974 + if (atomic_dec_and_test(&next->refcnt)) {
975 + kmem_cache_free(skbuff_cb_store_cache, next);
978 + spin_unlock(&skb_cb_store_lock);
982 +EXPORT_SYMBOL(skb_restore_cb);
984 +static void skb_copy_stored_cb(struct sk_buff *new, struct sk_buff *old)
986 + struct skb_cb_table *next;
988 + if (!old->cb_next) {
993 + spin_lock(&skb_cb_store_lock);
995 + next = old->cb_next;
996 + atomic_inc(&next->refcnt);
997 + new->cb_next = next;
999 + spin_unlock(&skb_cb_store_lock);
1003 /* Pipe buffer operations for a socket. */
1004 static struct pipe_buf_operations sock_pipe_buf_ops = {
1005 @@ -362,6 +439,15 @@ static void skb_release_all(struct sk_bu
1007 skb->destructor(skb);
1009 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1010 + /* This should not happen. When it does, avoid memleak by restoring
1011 + the chain of cb-backups. */
1012 + while(skb->cb_next != NULL) {
1013 + printk(KERN_WARNING "kfree_skb: skb->cb_next: %08x\n",
1015 + skb_restore_cb(skb);
1018 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1019 nf_conntrack_put(skb->nfct);
1020 nf_conntrack_put_reasm(skb->nfct_reasm);
1021 @@ -424,6 +510,9 @@ static void __copy_skb_header(struct sk_
1022 new->sp = secpath_get(old->sp);
1024 memcpy(new->cb, old->cb, sizeof(old->cb));
1025 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1026 + skb_copy_stored_cb(new, old);
1028 new->csum_start = old->csum_start;
1029 new->csum_offset = old->csum_offset;
1030 new->local_df = old->local_df;
1031 @@ -2326,6 +2415,13 @@ void __init skb_init(void)
1033 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1035 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1036 + skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1037 + sizeof(struct skb_cb_table),
1039 + SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1045 --- a/net/netfilter/Kconfig
1046 +++ b/net/netfilter/Kconfig
1047 @@ -342,6 +342,18 @@ config NETFILTER_XT_TARGET_DSCP
1049 To compile it as a module, choose M here. If unsure, say N.
1051 +config NETFILTER_XT_TARGET_IMQ
1052 + tristate '"IMQ" target support'
1053 + depends on NETFILTER_XTABLES
1054 + depends on IP_NF_MANGLE || IP6_NF_MANGLE
1056 + default m if NETFILTER_ADVANCED=n
1058 + This option adds a `IMQ' target which is used to specify if and
1059 + to which imq device packets should get enqueued/dequeued.
1061 + To compile it as a module, choose M here. If unsure, say N.
1063 config NETFILTER_XT_TARGET_MARK
1064 tristate '"MARK" target support'
1065 depends on NETFILTER_XTABLES
1066 --- a/net/netfilter/Makefile
1067 +++ b/net/netfilter/Makefile
1068 @@ -42,6 +42,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
1069 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
1070 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
1071 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1072 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1073 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
1074 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1075 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
1076 --- a/net/netfilter/nf_queue.c
1077 +++ b/net/netfilter/nf_queue.c
1078 @@ -20,6 +20,26 @@ static const struct nf_queue_handler *qu
1080 static DEFINE_MUTEX(queue_handler_mutex);
1082 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1083 +static const struct nf_queue_handler *queue_imq_handler;
1085 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1087 + mutex_lock(&queue_handler_mutex);
1088 + rcu_assign_pointer(queue_imq_handler, qh);
1089 + mutex_unlock(&queue_handler_mutex);
1091 +EXPORT_SYMBOL(nf_register_queue_imq_handler);
1093 +void nf_unregister_queue_imq_handler(void)
1095 + mutex_lock(&queue_handler_mutex);
1096 + rcu_assign_pointer(queue_imq_handler, NULL);
1097 + mutex_unlock(&queue_handler_mutex);
1099 +EXPORT_SYMBOL(nf_unregister_queue_imq_handler);
1102 /* return EBUSY when somebody else is registered, return EEXIST if the
1103 * same handler is registered, return 0 in case of success. */
1104 int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
1105 @@ -80,7 +100,7 @@ void nf_unregister_queue_handlers(const
1107 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1109 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1110 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1112 /* Release those devices we held, or Alexey will kill me. */
1114 @@ -100,6 +120,7 @@ static void nf_queue_entry_release_refs(
1115 /* Drop reference to owner of hook which queued us. */
1116 module_put(entry->elem->owner);
1118 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1121 * Any packet that leaves via this function must come back
1122 @@ -121,12 +142,26 @@ static int __nf_queue(struct sk_buff *sk
1124 const struct nf_afinfo *afinfo;
1125 const struct nf_queue_handler *qh;
1126 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1127 + const struct nf_queue_handler *qih = NULL;
1130 /* QUEUE == DROP if noone is waiting, to be safe. */
1133 qh = rcu_dereference(queue_handler[pf]);
1134 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1135 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1136 + if (pf == PF_INET || pf == PF_INET6)
1138 + if (pf == PF_INET)
1140 + qih = rcu_dereference(queue_imq_handler);
1148 afinfo = nf_get_afinfo(pf);
1149 @@ -145,6 +180,10 @@ static int __nf_queue(struct sk_buff *sk
1153 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1154 + .next_outfn = qh ? qh->outfn : NULL,
1155 + .next_queuenum = queuenum,
1159 /* If it's going away, ignore hook. */
1160 @@ -170,8 +209,19 @@ static int __nf_queue(struct sk_buff *sk
1163 afinfo->saveroute(skb, entry);
1165 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1167 + status = qih->outfn(entry, queuenum);
1168 + goto imq_skip_queue;
1172 status = qh->outfn(entry, queuenum);
1174 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1181 +++ b/net/netfilter/xt_IMQ.c
1184 + * This target marks packets to be enqueued to an imq device
1186 +#include <linux/module.h>
1187 +#include <linux/skbuff.h>
1188 +#include <linux/netfilter/x_tables.h>
1189 +#include <linux/netfilter/xt_IMQ.h>
1190 +#include <linux/imq.h>
1192 +static unsigned int imq_target(struct sk_buff *pskb,
1193 + const struct net_device *in,
1194 + const struct net_device *out,
1195 + unsigned int hooknum,
1196 + const struct xt_target *target,
1197 + const void *targinfo)
1199 + const struct xt_imq_info *mr = targinfo;
1201 + pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1203 + return XT_CONTINUE;
1206 +static bool imq_checkentry(const char *tablename,
1207 + const void *entry,
1208 + const struct xt_target *target,
1210 + unsigned int hook_mask)
1212 + struct xt_imq_info *mr = targinfo;
1214 + if (mr->todev > IMQ_MAX_DEVS - 1) {
1215 + printk(KERN_WARNING
1216 + "IMQ: invalid device specified, highest is %u\n",
1217 + IMQ_MAX_DEVS - 1);
1224 +static struct xt_target xt_imq_reg[] __read_mostly = {
1227 + .family = AF_INET,
1228 + .target = imq_target,
1229 + .targetsize = sizeof(struct xt_imq_info),
1230 + .table = "mangle",
1231 + .checkentry = imq_checkentry,
1236 + .family = AF_INET6,
1237 + .target = imq_target,
1238 + .targetsize = sizeof(struct xt_imq_info),
1239 + .table = "mangle",
1240 + .checkentry = imq_checkentry,
1245 +static int __init imq_init(void)
1247 + return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1250 +static void __exit imq_fini(void)
1252 + xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1255 +module_init(imq_init);
1256 +module_exit(imq_fini);
1258 +MODULE_AUTHOR("http://www.linuximq.net");
1259 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
1260 +MODULE_LICENSE("GPL");
1261 +MODULE_ALIAS("ipt_IMQ");
1262 +MODULE_ALIAS("ip6t_IMQ");