X-Git-Url: http://git.rohieb.name/openwrt.git/blobdiff_plain/c92885c466d11dc3b8e2837a8be1fd3b6b7e2e01..2684b5ae8d13e55be5081b0a4c4546295ead3d65:/target/linux/generic-2.6/patches/200-sched_esfq.patch diff --git a/target/linux/generic-2.6/patches/200-sched_esfq.patch b/target/linux/generic-2.6/patches/200-sched_esfq.patch index 6830b833a..ee4307474 100644 --- a/target/linux/generic-2.6/patches/200-sched_esfq.patch +++ b/target/linux/generic-2.6/patches/200-sched_esfq.patch @@ -1,7 +1,7 @@ -diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/linux/pkt_sched.h ---- linux-2.6.19.old/include/linux/pkt_sched.h 2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/include/linux/pkt_sched.h 2006-12-14 03:13:51.000000000 +0100 -@@ -146,8 +146,35 @@ +diff -urN linux-2.6.21.1.old/include/linux/pkt_sched.h linux-2.6.21.1.dev/include/linux/pkt_sched.h +--- linux-2.6.21.1.old/include/linux/pkt_sched.h 2007-04-27 23:49:26.000000000 +0200 ++++ linux-2.6.21.1.dev/include/linux/pkt_sched.h 2007-05-26 20:43:12.530587320 +0200 +@@ -146,8 +146,40 @@ * * The only reason for this is efficiency, it is possible * to change these parameters in compile time. @@ -22,6 +22,11 @@ diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/li + TCA_SFQ_HASH_DSTDIR, + TCA_SFQ_HASH_SRCDIR, + TCA_SFQ_HASH_FWMARKDIR, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, +}; + +struct tc_esfq_qopt @@ -37,31 +42,29 @@ diff -urN linux-2.6.19.old/include/linux/pkt_sched.h linux-2.6.19.dev/include/li /* RED section */ enum -diff -urN linux-2.6.19.old/net/sched/Kconfig linux-2.6.19.dev/net/sched/Kconfig ---- linux-2.6.19.old/net/sched/Kconfig 2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/Kconfig 2006-12-14 03:13:51.000000000 +0100 -@@ -185,6 +185,28 @@ +diff -urN linux-2.6.21.1.old/net/sched/Kconfig linux-2.6.21.1.dev/net/sched/Kconfig +--- linux-2.6.21.1.old/net/sched/Kconfig 2007-04-27 23:49:26.000000000 +0200 ++++ linux-2.6.21.1.dev/net/sched/Kconfig 2007-05-26 20:43:12.572580936 +0200 +@@ -189,6 +189,26 @@ To compile this code as a module, choose M here: the module will be called sch_sfq. +config NET_SCH_ESFQ -+ tristate "ESFQ queue" -+ depends on NET_SCHED ++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" + ---help--- + Say Y here if you want to use the Enhanced Stochastic Fairness + Queueing (ESFQ) packet scheduling algorithm for some of your network + devices or as a leaf discipline for a classful qdisc such as HTB or + CBQ (see the top of for details and + references to the SFQ algorithm). -+ ++ + This is an enchanced SFQ version which allows you to control some -+ hardcoded values in the SFQ scheduler: queue depth, hash table size, -+ and queues limit. -+ -+ ESFQ also adds control to the hash function used to identify packet -+ flows. The original SFQ hashes by individual flow (TCP session or UDP -+ stream); ESFQ can hash by src or dst IP as well, which can be more -+ fair to users in some networking situations. ++ hardcoded values in the SFQ scheduler. ++ ++ ESFQ also adds control of the hash function used to identify packet ++ flows. The original SFQ discipline hashes by connection; ESFQ add ++ several other hashing methods, such as by src IP or by dst IP, which ++ can be more fair to users in some networking situations. + + To compile this code as a module, choose M here: the + module will be called sch_esfq. @@ -69,9 +72,9 @@ diff -urN linux-2.6.19.old/net/sched/Kconfig linux-2.6.19.dev/net/sched/Kconfig config NET_SCH_TEQL tristate "True Link Equalizer (TEQL)" ---help--- -diff -urN linux-2.6.19.old/net/sched/Makefile linux-2.6.19.dev/net/sched/Makefile ---- linux-2.6.19.old/net/sched/Makefile 2006-11-29 22:57:37.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/Makefile 2006-12-14 03:13:51.000000000 +0100 +diff -urN linux-2.6.21.1.old/net/sched/Makefile linux-2.6.21.1.dev/net/sched/Makefile +--- linux-2.6.21.1.old/net/sched/Makefile 2007-04-27 23:49:26.000000000 +0200 ++++ linux-2.6.21.1.dev/net/sched/Makefile 2007-05-26 20:43:12.577580176 +0200 @@ -23,6 +23,7 @@ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o @@ -80,10 +83,10 @@ diff -urN linux-2.6.19.old/net/sched/Makefile linux-2.6.19.dev/net/sched/Makefil obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o -diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_esfq.c ---- linux-2.6.19.old/net/sched/sch_esfq.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.19.dev/net/sched/sch_esfq.c 2006-12-14 03:13:51.000000000 +0100 -@@ -0,0 +1,644 @@ +diff -urN linux-2.6.21.1.old/net/sched/sch_esfq.c linux-2.6.21.1.dev/net/sched/sch_esfq.c +--- linux-2.6.21.1.old/net/sched/sch_esfq.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.21.1.dev/net/sched/sch_esfq.c 2007-05-26 20:43:12.578580024 +0200 +@@ -0,0 +1,704 @@ +/* + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. + * @@ -103,12 +106,12 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + * + * Corey Hickey, + * Maintenance of the Linux 2.6 port. -+ * Added fwmark hash (thanks to Robert Kurjata) ++ * Added fwmark hash (thanks to Robert Kurjata). + * Added direct hashing for src, dst, and fwmark. ++ * Added usage of jhash. + * + */ + -+#include +#include +#include +#include @@ -135,12 +138,16 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e +#include +#include +#include ++#include + ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++#include ++#endif + +/* Stochastic Fairness Queuing algorithm. + For more comments look at sch_sfq.c. + The difference is that you can change limit, depth, -+ hash table size and choose 7 hash types. ++ hash table size and choose alternate hash types. + + classic: same as in sch_sfq.c + dst: destination IP address @@ -149,9 +156,11 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + dst_direct: + src_direct: + fwmark_direct: direct hashing of the above sources ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP + -+ TODO: -+ make sfq_change work. +*/ + + @@ -190,20 +199,24 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + unsigned dyn_range; /* saved range */ +}; + -+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h) ++/* This contains the info we will hash. */ ++struct esfq_packet_info +{ -+ int pert = q->perturbation; -+ -+ if (pert) -+ h = (h<>(0x1F - pert)); -+ -+ h = ntohl(h) * 2654435761UL; -+ return h & (q->hash_divisor-1); -+} ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++ u32 mark; /* netfilter mark (fwmark) */ ++}; + +/* Hash input values directly into the "nearest" slot, taking into account the + * range of input values seen. This is most useful when the hash table is at -+ * least as large as the range of possible values. */ ++ * least as large as the range of possible values. ++ * Note: this functionality was added before the change to using jhash, and may ++ * no longer be useful. */ +static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h) +{ + /* adjust minimum and maximum */ @@ -224,83 +237,128 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range; +} + -+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1) ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) +{ -+ int pert = q->perturbation; ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} + -+ /* Have we any rotation primitives? If not, WHY? */ -+ h ^= (h1<>(0x1F - pert)); -+ h ^= h>>10; -+ return h & (q->hash_divisor-1); ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); +} + -+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) +{ -+ u32 h, h2; -+ u32 hs; -+ u32 nfm; ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} + ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + { + struct iphdr *iph = skb->nh.iph; -+ h = iph->daddr; -+ hs = iph->saddr; -+ nfm = skb->nfmark; -+ h2 = hs^iph->protocol; ++ info.dst = iph->daddr; ++ info.src = iph->saddr; + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_SCTP || + iph->protocol == IPPROTO_DCCP || + iph->protocol == IPPROTO_ESP)) -+ h2 ^= *(((u32*)iph) + iph->ihl); ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; + break; + } + case __constant_htons(ETH_P_IPV6): + { + struct ipv6hdr *iph = skb->nh.ipv6h; -+ h = iph->daddr.s6_addr32[3]; -+ hs = iph->saddr.s6_addr32[3]; -+ nfm = skb->nfmark; -+ h2 = hs^iph->nexthdr; ++ /* Hash ipv6 addresses into a u32. This isn't ideal, ++ * but the code is simple. */ ++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); + if (iph->nexthdr == IPPROTO_TCP || + iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_SCTP || + iph->nexthdr == IPPROTO_DCCP || + iph->nexthdr == IPPROTO_ESP) -+ h2 ^= *(u32*)&iph[1]; ++ info.proto = *(u32*)&iph[1]; ++ else ++ info.proto = iph->nexthdr; + break; + } + default: -+ h = (u32)(unsigned long)skb->dst; -+ hs = (u32)(unsigned long)skb->sk; -+ nfm = skb->nfmark; -+ h2 = hs^skb->protocol; ++ info.dst = (u32)(unsigned long)skb->dst; ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++ info.mark = skb->mark; ++ ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ if (ct && ct != &nf_conntrack_untracked) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++ } ++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++ /* Again, hash ipv6 addresses into a single u32. */ ++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++ } ++ + } ++#endif ++ + switch(q->hash_kind) + { + case TCA_SFQ_HASH_CLASSIC: -+ return esfq_fold_hash_classic(q, h, h2); ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); + case TCA_SFQ_HASH_DST: -+ return esfq_hash_u32(q,h); ++ return esfq_jhash_1word(q, info.dst); + case TCA_SFQ_HASH_DSTDIR: -+ return esfq_hash_direct(q, ntohl(h)); ++ return esfq_hash_direct(q, ntohl(info.dst)); + case TCA_SFQ_HASH_SRC: -+ return esfq_hash_u32(q,hs); ++ return esfq_jhash_1word(q, info.src); + case TCA_SFQ_HASH_SRCDIR: -+ return esfq_hash_direct(q, ntohl(hs)); -+#ifdef CONFIG_NETFILTER ++ return esfq_hash_direct(q, ntohl(info.src)); + case TCA_SFQ_HASH_FWMARK: -+ return esfq_hash_u32(q,nfm); ++ return esfq_jhash_1word(q, info.mark); + case TCA_SFQ_HASH_FWMARKDIR: -+ return esfq_hash_direct(q,nfm); ++ return esfq_hash_direct(q, info.mark); ++#ifdef CONFIG_NF_CONNTRACK_ENABLED ++ case TCA_SFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_SFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_SFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_SFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); +#endif + default: + if (net_ratelimit()) + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); + } -+ return esfq_fold_hash_classic(q, h, h2); ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); +} + +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) @@ -365,6 +423,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + esfq_dec(q, x); + sch->q.qlen--; + sch->qstats.drops++; ++ sch->qstats.backlog -= len; + return len; + } + @@ -381,6 +440,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + sch->q.qlen--; + q->ht[q->hash[d]] = q->depth; + sch->qstats.drops++; ++ sch->qstats.backlog -= len; + return len; + } + @@ -400,6 +460,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + q->ht[hash] = x = q->dep[depth].next; + q->hash[x] = hash; + } ++ sch->qstats.backlog += skb->len; + __skb_queue_tail(&q->qs[x], skb); + esfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -436,6 +497,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + q->ht[hash] = x = q->dep[depth].next; + q->hash[x] = hash; + } ++ sch->qstats.backlog += skb->len; + __skb_queue_head(&q->qs[x], skb); + esfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -480,6 +542,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + skb = __skb_dequeue(&q->qs[a]); + esfq_dec(q, a); + sch->q.qlen--; ++ sch->qstats.backlog -= skb->len; + + /* Is the slot empty? */ + if (q->qs[a].qlen == 0) { @@ -542,7 +605,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e + + if (ctl->hash_kind) { + q->hash_kind = ctl->hash_kind; -+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) ++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) + q->perturb_period = 0; + } + @@ -566,7 +629,7 @@ diff -urN linux-2.6.19.old/net/sched/sch_esfq.c linux-2.6.19.dev/net/sched/sch_e +{ + struct esfq_sched_data *q = qdisc_priv(sch); + struct tc_esfq_qopt *ctl; -+ esfq_index p = ~0UL/2; ++ esfq_index p = ~0U/2; + int i; + + if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))