1 --- linux/net/sched/sch_htb.c 2003-10-14 01:09:35.000000000 -0700
2 +++ linux.2.4.26/net/sched/sch_htb.c 2004-05-10 00:05:51.000000000 -0700
4 * Authors: Martin Devera, <devik@cdi.cz>
6 * Credits (in time order) for older HTB versions:
7 + * Stef Coene <stef.coene@docum.org>
8 + * HTB support at LARTC mailing list
9 * Ondrej Kraus, <krauso@barr.cz>
10 * found missing INIT_QDISC(htb)
11 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13 * code review and helpful comments on shaping
14 * Tomasz Wrona, <tw@eter.tym.pl>
15 * created test case so that I was able to fix nasty bug
16 + * Wilfried Weissmann
17 + * spotted bug in dequeue code and helped with fix
19 + * fixed requeue routine
20 * and many others. thanks.
22 - * $Id: sch_htb.c,v 1.1.1.4 2003/10/14 08:09:35 sparq Exp $
23 + * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
25 #include <linux/config.h>
26 #include <linux/module.h>
28 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
29 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
30 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
31 -#define HTB_VER 0x30007 /* major must be matched with number suplied by TC as version */
32 +#define HTB_VER 0x30010 /* major must be matched with number suplied by TC as version */
34 #if HTB_VER >> 16 != TC_HTB_PROTOVER
35 #error "Mismatched sch_htb.c and pkt_sch.h"
38 -/* temporary debug defines to be removed after beta stage */
39 -#define DEVIK_MEND(N)
40 -#define DEVIK_MSTART(N)
42 /* debugging support; S is subsystem, these are defined:
49 -#define HTB_DBG(S,L,FMT,ARG...) if (((q->debug>>(2*S))&3) >= L) \
50 +#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
51 +#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
52 printk(KERN_DEBUG FMT,##ARG)
53 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
55 #define HTB_ARGQ struct htb_sched *q,
61 #define HTB_CMAGIC 0xFEFAFEF1
62 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
65 (N)->rb_color = -1; } while (0)
67 +#define HTB_DBG_COND(S,L) (0)
68 #define HTB_DBG(S,L,FMT,ARG...)
72 /* time of nearest event per level (row) */
73 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
75 + /* cached value of jiffies in dequeue */
76 + unsigned long jiffies;
78 /* whether we hit non-work conserving class during this dequeue; we use */
79 int nwc_hit; /* this to disable mindelay complaint in dequeue */
83 if (skb->priority == sch->handle)
84 return HTB_DIRECT; /* X:0 (direct flow) selected */
85 - if ((cl = htb_find(skb->priority,sch)) != NULL)
86 + if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
91 static void htb_debug_dump (struct htb_sched *q)
94 - printk(KERN_DEBUG "htb*g j=%lu\n",jiffies);
95 + printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
97 for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
98 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
100 if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
101 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
104 - cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay);
105 - if (cl->pq_key == jiffies)
106 + cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
107 + if (cl->pq_key == q->jiffies)
110 /* update the nearest event cache */
111 - if (q->near_ev_cache[cl->level] - cl->pq_key < 0x80000000)
112 + if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
113 q->near_ev_cache[cl->level] = cl->pq_key;
116 struct htb_class *c; parent = *p;
117 c = rb_entry(parent, struct htb_class, pq_node);
118 - if (cl->pq_key - c->pq_key < 0x80000000)
119 + if (time_after_eq(cl->pq_key, c->pq_key))
120 p = &parent->rb_right;
122 p = &parent->rb_left;
124 rb_link_node(&cl->pq_node, parent, p);
125 rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
130 @@ -453,12 +460,14 @@
133 if ((*n)->rb_right) {
134 + /* child at right. use it or its leftmost ancestor */
136 while ((*n)->rb_left)
140 while ((p = (*n)->rb_parent) != NULL) {
141 + /* if we've arrived from left child then we have next node */
142 if (p->rb_left == *n) break;
148 if ((toks = (cl->ctokens + *diff)) < (
149 -#ifdef HTB_HYSTERESIS
151 cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
155 return HTB_CANT_SEND;
157 if ((toks = (cl->tokens + *diff)) >= (
158 -#ifdef HTB_HYSTERESIS
160 cl->cmode == HTB_CAN_SEND ? -cl->buffer :
164 struct htb_sched *q = (struct htb_sched *)sch->data;
165 struct htb_class *cl = htb_classify(skb,sch);
168 if (cl == HTB_DIRECT || !cl) {
169 /* enqueue to helper queue */
170 if (q->direct_queue.qlen < q->direct_qlen && cl) {
171 @@ -698,25 +706,20 @@
176 return NET_XMIT_DROP;
178 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
182 return NET_XMIT_DROP;
184 cl->stats.packets++; cl->stats.bytes += skb->len;
191 sch->stats.packets++; sch->stats.bytes += skb->len;
192 - HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
194 + HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
195 return NET_XMIT_SUCCESS;
198 @@ -725,16 +728,18 @@
200 struct htb_sched *q = (struct htb_sched *)sch->data;
201 struct htb_class *cl = htb_classify(skb,sch);
202 + struct sk_buff *tskb;
204 if (cl == HTB_DIRECT || !cl) {
205 /* enqueue to helper queue */
206 if (q->direct_queue.qlen < q->direct_qlen && cl) {
207 - __skb_queue_tail(&q->direct_queue, skb);
209 + __skb_queue_head(&q->direct_queue, skb);
212 - sch->stats.drops++;
213 - return NET_XMIT_DROP;
214 + __skb_queue_head(&q->direct_queue, skb);
215 + tskb = __skb_dequeue_tail(&q->direct_queue);
217 + sch->stats.drops++;
218 + return NET_XMIT_CN;
220 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
226 - HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
227 + HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
228 return NET_XMIT_SUCCESS;
233 (unsigned long long) q->now,
234 (unsigned long long) cl->t_c,
242 * Scans event queue for pending events and applies them. Returns jiffies to
243 * next pending event (0 for no event in pq).
244 + * Note: Aplied are events whose have cl->pq_key <= jiffies.
246 static long htb_do_events(struct htb_sched *q,int level)
249 while (p->rb_left) p = p->rb_left;
251 cl = rb_entry(p, struct htb_class, pq_node);
252 - if (cl->pq_key - (jiffies+1) < 0x80000000) {
253 - HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies);
254 - return cl->pq_key - jiffies;
255 + if (time_after(cl->pq_key, q->jiffies)) {
256 + HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
257 + return cl->pq_key - q->jiffies;
259 htb_safe_rb_erase(p,q->wait_pq+level);
260 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
263 (unsigned long long) q->now,
264 (unsigned long long) cl->t_c,
272 } stk[TC_HTB_MAXDEPTH],*sp = stk;
274 + BUG_TRAP(tree->rb_node);
275 sp->root = tree->rb_node;
278 @@ -949,16 +956,36 @@
279 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
281 struct sk_buff *skb = NULL;
282 - //struct htb_sched *q = (struct htb_sched *)sch->data;
283 struct htb_class *cl,*start;
284 /* look initial class up in the row */
286 start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
289 - BUG_TRAP(cl && cl->un.leaf.q->q.qlen); if (!cl) return NULL;
292 + if (!cl) return NULL;
293 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
294 prio,level,cl->classid,cl->un.leaf.deficit[level]);
296 + /* class can be empty - it is unlikely but can be true if leaf
297 + qdisc drops packets in enqueue routine or if someone used
298 + graft operation on the leaf since last dequeue;
299 + simply deactivate and skip such class */
300 + if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
301 + struct htb_class *next;
302 + htb_deactivate(q,cl);
304 + /* row/level might become empty */
305 + if ((q->row_mask[level] & (1 << prio)) == 0)
308 + next = htb_lookup_leaf (q->row[level]+prio,
309 + prio,q->ptr[level]+prio);
310 + if (cl == start) /* fix start if we just deleted it */
316 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
319 cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
320 } while (cl != start);
324 if (likely(skb != NULL)) {
325 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
326 HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
327 @@ -984,11 +1009,8 @@
328 gives us slightly better performance */
329 if (!cl->un.leaf.q->q.qlen)
330 htb_deactivate (q,cl);
332 htb_charge_class (q,cl,level,skb->len);
339 @@ -1002,9 +1024,8 @@
340 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
343 - del_timer(&q->timer);
344 - q->timer.expires = jiffies + delay;
345 - add_timer(&q->timer);
346 + /* why don't use jiffies here ? because expires can be in past */
347 + mod_timer(&q->timer, q->jiffies + delay);
348 sch->flags |= TCQ_F_THROTTLED;
349 sch->stats.overlimits++;
350 HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
351 @@ -1016,7 +1037,11 @@
352 struct htb_sched *q = (struct htb_sched *)sch->data;
359 + q->jiffies = jiffies;
360 HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
363 @@ -1027,27 +1052,26 @@
368 if (!sch->q.qlen) goto fin;
369 PSCHED_GET_TIME(q->now);
372 + min_delay = LONG_MAX;
374 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
375 /* common case optimization - skip event handler quickly */
379 - if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
380 + if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
381 delay = htb_do_events(q,level);
382 - q->near_ev_cache[level] += delay ? delay : HZ;
383 + q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
388 - delay = q->near_ev_cache[level] - jiffies;
389 + delay = q->near_ev_cache[level] - q->jiffies;
391 if (delay && min_delay > delay)
395 m = ~q->row_mask[level];
396 while (m != (int)(-1)) {
398 @@ -1056,29 +1080,29 @@
399 if (likely(skb != NULL)) {
401 sch->flags &= ~TCQ_F_THROTTLED;
410 - if (!q->nwc_hit && min_delay >= 5*HZ && net_ratelimit()) {
411 - printk(KERN_ERR "HTB: mindelay=%ld, report it please !\n",min_delay);
413 + if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
414 + if (min_delay == LONG_MAX) {
415 + printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
416 + evs_used,q->jiffies,jiffies);
419 + printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
420 + "too small rate\n",min_delay);
423 - htb_delay_by (sch,min_delay);
425 + htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
427 - HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb);
429 + HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
433 /* try to drop from each class (by prio) until one succeed */
434 -static int htb_drop(struct Qdisc* sch)
435 +static unsigned int htb_drop(struct Qdisc* sch)
437 struct htb_sched *q = (struct htb_sched *)sch->data;
439 @@ -1086,14 +1110,15 @@
440 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
442 list_for_each (p,q->drops+prio) {
443 - struct htb_class *cl = list_entry(p,struct htb_class,
444 - un.leaf.drop_list);
445 + struct htb_class *cl = list_entry(p, struct htb_class,
446 + un.leaf.drop_list);
448 if (cl->un.leaf.q->ops->drop &&
449 - cl->un.leaf.q->ops->drop(cl->un.leaf.q)) {
450 + (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
452 if (!cl->un.leaf.q->q.qlen)
453 htb_deactivate (q,cl);
459 @@ -1208,7 +1233,8 @@
460 gopt.direct_pkts = q->direct_pkts;
464 + if (HTB_DBG_COND(0,2))
467 gopt.version = HTB_VER;
468 gopt.rate2quantum = q->rate2quantum;
469 @@ -1289,6 +1315,9 @@
472 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
473 + if (cl->prio_activity)
474 + htb_deactivate ((struct htb_sched*)sch->data,cl);
476 /* TODO: is it correct ? Why CBQ doesn't do it ? */
477 sch->q.qlen -= (*old)->q.qlen;
479 @@ -1323,7 +1352,7 @@
481 while ((tp = *fl) != NULL) {
483 - tp->ops->destroy(tp);
488 @@ -1371,11 +1400,16 @@
490 del_timer_sync (&q->rttim);
492 + /* This line used to be after htb_destroy_class call below
493 + and surprisingly it worked in 2.4. But it must precede it
494 + because filter need its target class alive to be able to call
495 + unbind_filter on it (without Oops). */
496 + htb_destroy_filters(&q->filter_list);
498 while (!list_empty(&q->root))
499 htb_destroy_class (sch,list_entry(q->root.next,
500 struct htb_class,sibling));
502 - htb_destroy_filters(&q->filter_list);
503 __skb_queue_purge(&q->direct_queue);
506 @@ -1438,12 +1472,13 @@
507 parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
509 hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
510 - HTB_DBG(0,1,"htb_chg cl=%p, clid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
511 + HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
512 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
513 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
514 if (!rtab || !ctab) goto failure;
516 if (!cl) { /* new class */
517 + struct Qdisc *new_q;
518 /* check for valid classid */
519 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
521 @@ -1467,6 +1502,10 @@
522 cl->magic = HTB_CMAGIC;
525 + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
526 + so that can't be used inside of sch_tree_lock
527 + -- thanks to Karlis Peisenieks */
528 + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
530 if (parent && !parent->level) {
531 /* turn parent into inner node */
532 @@ -1485,8 +1524,7 @@
533 memset (&parent->un.inner,0,sizeof(parent->un.inner));
535 /* leaf (we) needs elementary qdisc */
536 - if (!(cl->un.leaf.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
537 - cl->un.leaf.q = &noop_qdisc;
538 + cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
540 cl->classid = classid; cl->parent = parent;
542 @@ -1514,11 +1552,11 @@
544 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
545 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
546 - printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.", cl->classid);
547 + printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
548 cl->un.leaf.quantum = 1000;
550 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
551 - printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.", cl->classid);
552 + printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
553 cl->un.leaf.quantum = 200000;
556 --- linux/include/net/pkt_cls.h 2003-07-04 01:12:28.000000000 -0700
557 +++ linux.2.4.26/include/net/pkt_cls.h 2004-05-10 22:21:40.000000000 -0700
563 +static inline void tcf_destroy(struct tcf_proto *tp)
565 + tp->ops->destroy(tp);
569 extern int register_tcf_proto_ops(struct tcf_proto_ops *ops);
570 extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);