-diff -Naur linux-2.6.15.1.orig/include/linux/pkt_sched.h linux-2.6.15.1/include/linux/pkt_sched.h
---- linux-2.6.15.1.orig/include/linux/pkt_sched.h 2006-01-14 22:16:02.000000000 -0800
-+++ linux-2.6.15.1/include/linux/pkt_sched.h 2006-01-30 16:02:32.000000000 -0800
-@@ -146,8 +146,35 @@
+Index: linux-2.6.21.7/include/linux/pkt_sched.h
+===================================================================
+--- linux-2.6.21.7.orig/include/linux/pkt_sched.h
++++ linux-2.6.21.7/include/linux/pkt_sched.h
+@@ -146,8 +146,40 @@ struct tc_sfq_qopt
*
* The only reason for this is efficiency, it is possible
* to change these parameters in compile time.
+ TCA_SFQ_HASH_DSTDIR,
+ TCA_SFQ_HASH_SRCDIR,
+ TCA_SFQ_HASH_FWMARKDIR,
++ /* conntrack */
++ TCA_SFQ_HASH_CTORIGDST,
++ TCA_SFQ_HASH_CTORIGSRC,
++ TCA_SFQ_HASH_CTREPLDST,
++ TCA_SFQ_HASH_CTREPLSRC,
+};
+
+struct tc_esfq_qopt
/* RED section */
enum
-diff -Naur linux-2.6.15.1.orig/net/sched/Kconfig linux-2.6.15.1/net/sched/Kconfig
---- linux-2.6.15.1.orig/net/sched/Kconfig 2006-01-14 22:16:02.000000000 -0800
-+++ linux-2.6.15.1/net/sched/Kconfig 2006-01-30 16:02:32.000000000 -0800
-@@ -185,6 +185,28 @@
+Index: linux-2.6.21.7/net/sched/Kconfig
+===================================================================
+--- linux-2.6.21.7.orig/net/sched/Kconfig
++++ linux-2.6.21.7/net/sched/Kconfig
+@@ -189,6 +189,26 @@ config NET_SCH_SFQ
To compile this code as a module, choose M here: the
module will be called sch_sfq.
+config NET_SCH_ESFQ
-+ tristate "ESFQ queue"
-+ depends on NET_SCHED
++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"
+ ---help---
+ Say Y here if you want to use the Enhanced Stochastic Fairness
+ Queueing (ESFQ) packet scheduling algorithm for some of your network
+ devices or as a leaf discipline for a classful qdisc such as HTB or
+ CBQ (see the top of <file:net/sched/sch_esfq.c> for details and
+ references to the SFQ algorithm).
-+
++
+ This is an enchanced SFQ version which allows you to control some
-+ hardcoded values in the SFQ scheduler: queue depth, hash table size,
-+ and queues limit.
-+
-+ ESFQ also adds control to the hash function used to identify packet
-+ flows. The original SFQ hashes by individual flow (TCP session or UDP
-+ stream); ESFQ can hash by src or dst IP as well, which can be more
-+ fair to users in some networking situations.
++ hardcoded values in the SFQ scheduler.
++
++ ESFQ also adds control of the hash function used to identify packet
++ flows. The original SFQ discipline hashes by connection; ESFQ add
++ several other hashing methods, such as by src IP or by dst IP, which
++ can be more fair to users in some networking situations.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_esfq.
config NET_SCH_TEQL
tristate "True Link Equalizer (TEQL)"
---help---
-diff -Naur linux-2.6.15.1.orig/net/sched/Makefile linux-2.6.15.1/net/sched/Makefile
---- linux-2.6.15.1.orig/net/sched/Makefile 2006-01-14 22:16:02.000000000 -0800
-+++ linux-2.6.15.1/net/sched/Makefile 2006-01-30 16:02:32.000000000 -0800
-@@ -23,6 +23,7 @@
+Index: linux-2.6.21.7/net/sched/Makefile
+===================================================================
+--- linux-2.6.21.7.orig/net/sched/Makefile
++++ linux-2.6.21.7/net/sched/Makefile
+@@ -23,6 +23,7 @@ obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
-diff -Naur linux-2.6.15.1.orig/net/sched/sch_esfq.c linux-2.6.15.1/net/sched/sch_esfq.c
---- linux-2.6.15.1.orig/net/sched/sch_esfq.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux-2.6.15.1/net/sched/sch_esfq.c 2006-01-30 16:12:29.000000000 -0800
-@@ -0,0 +1,644 @@
+Index: linux-2.6.21.7/net/sched/sch_esfq.c
+===================================================================
+--- /dev/null
++++ linux-2.6.21.7/net/sched/sch_esfq.c
+@@ -0,0 +1,704 @@
+/*
+ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
+ *
+ *
+ * Corey Hickey, <bugfood-c@fatooh.org>
+ * Maintenance of the Linux 2.6 port.
-+ * Added fwmark hash (thanks to Robert Kurjata)
++ * Added fwmark hash (thanks to Robert Kurjata).
+ * Added direct hashing for src, dst, and fwmark.
++ * Added usage of jhash.
+ *
+ */
+
-+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
++#include <linux/jhash.h>
+
++#ifdef CONFIG_NF_CONNTRACK_ENABLED
++#include <net/netfilter/nf_conntrack.h>
++#endif
+
+/* Stochastic Fairness Queuing algorithm.
+ For more comments look at sch_sfq.c.
+ The difference is that you can change limit, depth,
-+ hash table size and choose 7 hash types.
++ hash table size and choose alternate hash types.
+
+ classic: same as in sch_sfq.c
+ dst: destination IP address
+ dst_direct:
+ src_direct:
+ fwmark_direct: direct hashing of the above sources
++ ctorigdst: original destination IP address
++ ctorigsrc: original source IP address
++ ctrepldst: reply destination IP address
++ ctreplsrc: reply source IP
+
-+ TODO:
-+ make sfq_change work.
+*/
+
+
+ unsigned dyn_range; /* saved range */
+};
+
-+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
++/* This contains the info we will hash. */
++struct esfq_packet_info
+{
-+ int pert = q->perturbation;
-+
-+ if (pert)
-+ h = (h<<pert) ^ (h>>(0x1F - pert));
-+
-+ h = ntohl(h) * 2654435761UL;
-+ return h & (q->hash_divisor-1);
-+}
++ u32 proto; /* protocol or port */
++ u32 src; /* source from packet header */
++ u32 dst; /* destination from packet header */
++ u32 ctorigsrc; /* original source from conntrack */
++ u32 ctorigdst; /* original destination from conntrack */
++ u32 ctreplsrc; /* reply source from conntrack */
++ u32 ctrepldst; /* reply destination from conntrack */
++ u32 mark; /* netfilter mark (fwmark) */
++};
+
+/* Hash input values directly into the "nearest" slot, taking into account the
+ * range of input values seen. This is most useful when the hash table is at
-+ * least as large as the range of possible values. */
++ * least as large as the range of possible values.
++ * Note: this functionality was added before the change to using jhash, and may
++ * no longer be useful. */
+static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h)
+{
+ /* adjust minimum and maximum */
+ return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range;
+}
+
-+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)
+{
-+ int pert = q->perturbation;
++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);
++}
+
-+ /* Have we any rotation primitives? If not, WHY? */
-+ h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
-+ h ^= h>>10;
-+ return h & (q->hash_divisor-1);
++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)
++{
++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);
+}
+
-+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)
+{
-+ u32 h, h2;
-+ u32 hs;
-+ u32 nfm;
++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);
++}
+
++
++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
++{
++ struct esfq_packet_info info;
++#ifdef CONFIG_NF_CONNTRACK_ENABLED
++ enum ip_conntrack_info ctinfo;
++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++#endif
++
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ {
+ struct iphdr *iph = skb->nh.iph;
-+ h = iph->daddr;
-+ hs = iph->saddr;
-+ nfm = skb->nfmark;
-+ h2 = hs^iph->protocol;
++ info.dst = iph->daddr;
++ info.src = iph->saddr;
+ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+ (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_SCTP ||
+ iph->protocol == IPPROTO_DCCP ||
+ iph->protocol == IPPROTO_ESP))
-+ h2 ^= *(((u32*)iph) + iph->ihl);
++ info.proto = *(((u32*)iph) + iph->ihl);
++ else
++ info.proto = iph->protocol;
+ break;
+ }
+ case __constant_htons(ETH_P_IPV6):
+ {
+ struct ipv6hdr *iph = skb->nh.ipv6h;
-+ h = iph->daddr.s6_addr32[3];
-+ hs = iph->saddr.s6_addr32[3];
-+ nfm = skb->nfmark;
-+ h2 = hs^iph->nexthdr;
++ /* Hash ipv6 addresses into a u32. This isn't ideal,
++ * but the code is simple. */
++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation);
++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);
+ if (iph->nexthdr == IPPROTO_TCP ||
+ iph->nexthdr == IPPROTO_UDP ||
+ iph->nexthdr == IPPROTO_SCTP ||
+ iph->nexthdr == IPPROTO_DCCP ||
+ iph->nexthdr == IPPROTO_ESP)
-+ h2 ^= *(u32*)&iph[1];
++ info.proto = *(u32*)&iph[1];
++ else
++ info.proto = iph->nexthdr;
+ break;
+ }
+ default:
-+ h = (u32)(unsigned long)skb->dst;
-+ hs = (u32)(unsigned long)skb->sk;
-+ nfm = skb->nfmark;
-+ h2 = hs^skb->protocol;
++ info.dst = (u32)(unsigned long)skb->dst;
++ info.src = (u32)(unsigned long)skb->sk;
++ info.proto = skb->protocol;
++ }
++
++ info.mark = skb->mark;
++
++#ifdef CONFIG_NF_CONNTRACK_ENABLED
++ /* defaults if there is no conntrack info */
++ info.ctorigsrc = info.src;
++ info.ctorigdst = info.dst;
++ info.ctreplsrc = info.dst;
++ info.ctrepldst = info.src;
++ /* collect conntrack info */
++ if (ct && ct != &nf_conntrack_untracked) {
++ if (skb->protocol == __constant_htons(ETH_P_IP)) {
++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;
++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;
++ }
++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
++ /* Again, hash ipv6 addresses into a single u32. */
++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation);
++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation);
++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation);
++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation);
++ }
++
+ }
++#endif
++
+ switch(q->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
-+ return esfq_fold_hash_classic(q, h, h2);
++ return esfq_jhash_3words(q, info.dst, info.src, info.proto);
+ case TCA_SFQ_HASH_DST:
-+ return esfq_hash_u32(q,h);
++ return esfq_jhash_1word(q, info.dst);
+ case TCA_SFQ_HASH_DSTDIR:
-+ return esfq_hash_direct(q, ntohl(h));
++ return esfq_hash_direct(q, ntohl(info.dst));
+ case TCA_SFQ_HASH_SRC:
-+ return esfq_hash_u32(q,hs);
++ return esfq_jhash_1word(q, info.src);
+ case TCA_SFQ_HASH_SRCDIR:
-+ return esfq_hash_direct(q, ntohl(hs));
-+#ifdef CONFIG_NETFILTER
++ return esfq_hash_direct(q, ntohl(info.src));
+ case TCA_SFQ_HASH_FWMARK:
-+ return esfq_hash_u32(q,nfm);
++ return esfq_jhash_1word(q, info.mark);
+ case TCA_SFQ_HASH_FWMARKDIR:
-+ return esfq_hash_direct(q,nfm);
++ return esfq_hash_direct(q, info.mark);
++#ifdef CONFIG_NF_CONNTRACK_ENABLED
++ case TCA_SFQ_HASH_CTORIGDST:
++ return esfq_jhash_1word(q, info.ctorigdst);
++ case TCA_SFQ_HASH_CTORIGSRC:
++ return esfq_jhash_1word(q, info.ctorigsrc);
++ case TCA_SFQ_HASH_CTREPLDST:
++ return esfq_jhash_1word(q, info.ctrepldst);
++ case TCA_SFQ_HASH_CTREPLSRC:
++ return esfq_jhash_1word(q, info.ctreplsrc);
+#endif
+ default:
+ if (net_ratelimit())
+ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
+ }
-+ return esfq_fold_hash_classic(q, h, h2);
++ return esfq_jhash_3words(q, info.dst, info.src, info.proto);
+}
+
+static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
+ esfq_dec(q, x);
+ sch->q.qlen--;
+ sch->qstats.drops++;
++ sch->qstats.backlog -= len;
+ return len;
+ }
+
+ sch->q.qlen--;
+ q->ht[q->hash[d]] = q->depth;
+ sch->qstats.drops++;
++ sch->qstats.backlog -= len;
+ return len;
+ }
+
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
++ sch->qstats.backlog += skb->len;
+ __skb_queue_tail(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
++ sch->qstats.backlog += skb->len;
+ __skb_queue_head(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ skb = __skb_dequeue(&q->qs[a]);
+ esfq_dec(q, a);
+ sch->q.qlen--;
++ sch->qstats.backlog -= skb->len;
+
+ /* Is the slot empty? */
+ if (q->qs[a].qlen == 0) {
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
-+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
+ q->perturb_period = 0;
+ }
+
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ struct tc_esfq_qopt *ctl;
-+ esfq_index p = ~0UL/2;
++ esfq_index p = ~0U/2;
+ int i;
+
+ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))