1 diff -ur v2.6.14/linux/include/linux/netfilter_ipv4/ip_nat.h linux/include/linux/netfilter_ipv4/ip_nat.h
2 --- v2.6.14/linux/include/linux/netfilter_ipv4/ip_nat.h 2005-10-29 14:15:09.000000000 +0300
3 +++ linux/include/linux/netfilter_ipv4/ip_nat.h 2005-10-29 18:11:32.885759304 +0300
8 +/* Call input routing for SNAT-ed traffic */
9 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
10 + struct sk_buff **pskb,
11 + const struct net_device *in,
12 + const struct net_device *out,
13 + int (*okfn)(struct sk_buff *));
15 /* Set up the info structure to map into this range. */
16 extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
17 const struct ip_nat_range *range,
18 diff -ur v2.6.14/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
19 --- v2.6.14/linux/include/linux/rtnetlink.h 2005-10-29 14:15:09.000000000 +0300
20 +++ linux/include/linux/rtnetlink.h 2005-10-29 18:11:21.299520680 +0300
22 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
23 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
24 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
25 +#define RTNH_F_SUSPECT 8 /* We don't know the real state */
26 +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
28 /* Macros to handle hexthops */
30 diff -ur v2.6.14/linux/include/net/flow.h linux/include/net/flow.h
31 --- v2.6.14/linux/include/net/flow.h 2005-06-18 08:50:52.000000000 +0300
32 +++ linux/include/net/flow.h 2005-10-29 18:11:32.885759304 +0300
43 #define fl4_dst nl_u.ip4_u.daddr
44 #define fl4_src nl_u.ip4_u.saddr
45 #define fl4_fwmark nl_u.ip4_u.fwmark
46 +#define fl4_lsrc nl_u.ip4_u.lsrc
47 +#define fl4_gw nl_u.ip4_u.gw
48 #define fl4_tos nl_u.ip4_u.tos
49 #define fl4_scope nl_u.ip4_u.scope
51 diff -ur v2.6.14/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
52 --- v2.6.14/linux/include/net/ip_fib.h 2005-10-29 14:15:09.000000000 +0300
53 +++ linux/include/net/ip_fib.h 2005-10-29 18:11:21.300520528 +0300
56 static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
58 - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
59 + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
60 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
61 ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res);
65 extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
66 extern struct fib_table *__fib_new_table(int id);
67 extern void fib_rule_put(struct fib_rule *r);
68 +extern int fib_result_table(struct fib_result *res);
70 static inline struct fib_table *fib_get_table(int id)
73 extern void fib_proc_exit(void);
76 +extern rwlock_t fib_nhflags_lock;
78 #endif /* _NET_FIB_H */
79 diff -ur v2.6.14/linux/include/net/route.h linux/include/net/route.h
80 --- v2.6.14/linux/include/net/route.h 2005-10-29 14:15:09.000000000 +0300
81 +++ linux/include/net/route.h 2005-10-29 18:11:32.885759304 +0300
83 extern int ip_route_output_key(struct rtable **, struct flowi *flp);
84 extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
85 extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin);
86 +extern int ip_route_input_lookup(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin, u32 lsrc);
87 extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
88 extern void ip_rt_send_redirect(struct sk_buff *skb);
90 diff -ur v2.6.14/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
91 --- v2.6.14/linux/net/ipv4/fib_frontend.c 2005-10-29 14:15:09.000000000 +0300
92 +++ linux/net/ipv4/fib_frontend.c 2005-10-29 18:11:21.300520528 +0300
94 struct fib_table *ip_fib_local_table;
95 struct fib_table *ip_fib_main_table;
97 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
101 #define RT_TABLE_MIN 1
106 +#define FIB_RES_TABLE(r) (fib_result_table(r))
108 #endif /* CONFIG_IP_MULTIPLE_TABLES */
113 struct fib_result res;
115 + unsigned char prefixlen;
116 + unsigned char scope;
120 @@ -189,31 +195,35 @@
122 *spec_dst = FIB_RES_PREFSRC(res);
123 fib_combine_itag(itag, &res);
124 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
125 - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
127 if (FIB_RES_DEV(res) == dev)
130 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
134 + table = FIB_RES_TABLE(&res);
135 + prefixlen = res.prefixlen;
142 fl.oif = dev->ifindex;
145 if (fib_lookup(&fl, &res) == 0) {
146 - if (res.type == RTN_UNICAST) {
147 + if (res.type == RTN_UNICAST &&
148 + ((table == FIB_RES_TABLE(&res) &&
149 + res.prefixlen >= prefixlen && res.scope >= scope) ||
151 *spec_dst = FIB_RES_PREFSRC(res);
152 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
167 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
168 fib_sync_up(ifa->ifa_dev->dev);
176 } endfor_ifa(in_dev);
177 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
183 diff -ur v2.6.14/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
184 --- v2.6.14/linux/net/ipv4/fib_hash.c 2005-10-29 14:15:09.000000000 +0300
185 +++ linux/net/ipv4/fib_hash.c 2005-10-29 18:11:21.301520376 +0300
186 @@ -276,30 +276,38 @@
190 -static int fn_hash_last_dflt=-1;
193 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
195 - int order, last_idx;
196 + int order, last_idx, last_dflt, last_nhsel;
197 + struct fib_alias *first_fa = NULL;
198 + struct hlist_head *head;
199 struct hlist_node *node;
201 struct fib_info *fi = NULL;
202 struct fib_info *last_resort;
203 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
204 - struct fn_zone *fz = t->fn_zones[0];
205 + struct fn_zone *fz = t->fn_zones[res->prefixlen];
211 + k = fz_key(flp->fl4_dst, fz);
218 read_lock(&fib_hash_lock);
219 - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
220 + head = &fz->fz_hash[fn_hash(k, fz)];
221 + hlist_for_each_entry(f, node, head, fn_hash) {
222 struct fib_alias *fa;
224 + if (f->fn_key != k)
227 list_for_each_entry(fa, &f->fn_alias, fa_list) {
228 struct fib_info *next_fi = fa->fa_info;
230 @@ -307,41 +315,52 @@
231 fa->fa_type != RTN_UNICAST)
235 + fa->fa_tos != flp->fl4_tos)
237 if (next_fi->fib_priority > res->fi->fib_priority)
239 - if (!next_fi->fib_nh[0].nh_gw ||
240 - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
242 fa->fa_state |= FA_S_ACCESSED;
245 - if (next_fi != res->fi)
247 - } else if (!fib_detect_death(fi, order, &last_resort,
248 - &last_idx, &fn_hash_last_dflt)) {
250 + last_dflt = fa->fa_last_dflt;
253 + if (fi && !fib_detect_death(fi, order, &last_resort,
254 + &last_idx, &last_dflt, &last_nhsel, flp)) {
256 fib_info_put(res->fi);
258 atomic_inc(&fi->fib_clntref);
259 - fn_hash_last_dflt = order;
260 + first_fa->fa_last_dflt = order;
269 if (order <= 0 || fi == NULL) {
270 - fn_hash_last_dflt = -1;
271 + if (fi && fi->fib_nhs > 1 &&
272 + fib_detect_death(fi, order, &last_resort, &last_idx,
273 + &last_dflt, &last_nhsel, flp) &&
274 + last_resort == fi) {
275 + read_lock_bh(&fib_nhflags_lock);
276 + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
277 + read_unlock_bh(&fib_nhflags_lock);
279 + if (first_fa) first_fa->fa_last_dflt = -1;
283 - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
284 + if (!fib_detect_death(fi, order, &last_resort, &last_idx,
285 + &last_dflt, &last_nhsel, flp)) {
287 fib_info_put(res->fi);
289 atomic_inc(&fi->fib_clntref);
290 - fn_hash_last_dflt = order;
291 + first_fa->fa_last_dflt = order;
296 res->fi = last_resort;
298 atomic_inc(&last_resort->fib_clntref);
299 + read_lock_bh(&fib_nhflags_lock);
300 + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
301 + read_unlock_bh(&fib_nhflags_lock);
302 + first_fa->fa_last_dflt = last_idx;
304 - fn_hash_last_dflt = last_idx;
306 read_unlock(&fib_hash_lock);
309 write_lock_bh(&fib_hash_lock);
310 fi_drop = fa->fa_info;
312 + fa->fa_last_dflt = -1;
314 fa->fa_scope = r->rtm_scope;
315 state = fa->fa_state;
317 new_fa->fa_type = type;
318 new_fa->fa_scope = r->rtm_scope;
319 new_fa->fa_state = 0;
320 + new_fa->fa_last_dflt = -1;
323 * Insert new entry to the list.
324 diff -ur v2.6.14/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
325 --- v2.6.14/linux/net/ipv4/fib_lookup.h 2005-10-29 14:15:09.000000000 +0300
326 +++ linux/net/ipv4/fib_lookup.h 2005-10-29 18:11:21.302520224 +0300
328 struct list_head fa_list;
330 struct fib_info *fa_info;
337 extern int fib_detect_death(struct fib_info *fi, int order,
338 struct fib_info **last_resort,
339 - int *last_idx, int *dflt);
340 + int *last_idx, int *dflt, int *last_nhsel,
341 + const struct flowi *flp);
343 #endif /* _FIB_LOOKUP_H */
344 diff -ur v2.6.14/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
345 --- v2.6.14/linux/net/ipv4/fib_rules.c 2005-08-29 07:51:29.000000000 +0300
346 +++ linux/net/ipv4/fib_rules.c 2005-10-29 18:11:21.302520224 +0300
351 +int fib_result_table(struct fib_result *res)
353 + return res->r->r_table;
356 int fib_lookup(const struct flowi *flp, struct fib_result *res)
360 void fib_select_default(const struct flowi *flp, struct fib_result *res)
362 if (res->r && res->r->r_action == RTN_UNICAST &&
363 - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
364 + ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
365 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)) {
366 struct fib_table *tb;
367 if ((tb = fib_get_table(res->r->r_table)) != NULL)
368 tb->tb_select_default(tb, flp, res);
369 diff -ur v2.6.14/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
370 --- v2.6.14/linux/net/ipv4/fib_semantics.c 2005-10-29 14:15:09.000000000 +0300
371 +++ linux/net/ipv4/fib_semantics.c 2005-10-29 18:11:32.886759152 +0300
373 static struct hlist_head *fib_info_laddrhash;
374 static unsigned int fib_hash_size;
375 static unsigned int fib_info_cnt;
376 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
378 #define DEVINDEX_HASHBITS 8
379 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
381 #ifdef CONFIG_NET_CLS_ROUTE
382 nh->nh_tclassid != onh->nh_tclassid ||
384 - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
385 + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
388 } endfor_nexthops(fi);
390 nfi->fib_priority == fi->fib_priority &&
391 memcmp(nfi->fib_metrics, fi->fib_metrics,
392 sizeof(fi->fib_metrics)) == 0 &&
393 - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
394 + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
395 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
398 @@ -317,26 +318,70 @@
401 int fib_detect_death(struct fib_info *fi, int order,
402 - struct fib_info **last_resort, int *last_idx, int *dflt)
403 + struct fib_info **last_resort, int *last_idx, int *dflt,
404 + int *last_nhsel, const struct flowi *flp)
407 - int state = NUD_NONE;
410 + struct fib_nh * nh;
412 + int flag, dead = 1;
414 + /* change_nexthops(fi) { */
415 + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
416 + if (flp->oif && flp->oif != nh->nh_oif)
418 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
419 + nh->nh_scope == RT_SCOPE_LINK)
421 + if (nh->nh_flags & RTNH_F_DEAD)
424 - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
426 - state = n->nud_state;
429 - if (state==NUD_REACHABLE)
431 - if ((state&NUD_VALID) && order != *dflt)
433 - if ((state&NUD_VALID) ||
434 - (*last_idx<0 && order > *dflt)) {
438 + if (nh->nh_dev->flags & IFF_NOARP) {
444 + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
445 + dst = flp->fl4_dst;
448 + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
450 + state = n->nud_state;
453 + if (state==NUD_REACHABLE ||
454 + ((state&NUD_VALID) && order != *dflt)) {
458 + if (!(state&NUD_VALID))
462 + if ((state&NUD_VALID) ||
463 + (*last_idx<0 && order >= *dflt)) {
466 + *last_nhsel = nhsel;
471 + read_lock_bh(&fib_nhflags_lock);
473 + nh->nh_flags |= RTNH_F_SUSPECT;
475 + nh->nh_flags &= ~RTNH_F_SUSPECT;
476 + read_unlock_bh(&fib_nhflags_lock);
479 + /* } endfor_nexthops(fi) */
484 #ifdef CONFIG_IP_ROUTE_MULTIPATH
487 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
489 - if (!(dev->flags&IFF_UP))
491 + if (!(dev->flags&IFF_UP)) {
492 + if (fi->fib_protocol != RTPROT_STATIC)
494 + nh->nh_flags |= RTNH_F_DEAD;
498 nh->nh_scope = RT_SCOPE_LINK;
499 @@ -523,24 +571,48 @@
500 /* It is not necessary, but requires a bit of thinking */
501 if (fl.fl4_scope < RT_SCOPE_LINK)
502 fl.fl4_scope = RT_SCOPE_LINK;
503 - if ((err = fib_lookup(&fl, &res)) != 0)
505 + err = fib_lookup(&fl, &res);
508 - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
510 - nh->nh_scope = res.scope;
511 - nh->nh_oif = FIB_RES_OIF(res);
512 - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
514 - dev_hold(nh->nh_dev);
516 - if (!(nh->nh_dev->flags & IFF_UP))
520 + struct in_device *in_dev;
522 + if (err != -ENETUNREACH ||
523 + fi->fib_protocol != RTPROT_STATIC)
526 + in_dev = inetdev_by_index(nh->nh_oif);
527 + if (in_dev == NULL ||
528 + in_dev->dev->flags & IFF_UP) {
530 + in_dev_put(in_dev);
533 + nh->nh_flags |= RTNH_F_DEAD;
534 + nh->nh_scope = RT_SCOPE_LINK;
535 + nh->nh_dev = in_dev->dev;
536 + dev_hold(nh->nh_dev);
537 + in_dev_put(in_dev);
540 + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
542 + nh->nh_scope = res.scope;
543 + nh->nh_oif = FIB_RES_OIF(res);
544 + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
546 + dev_hold(nh->nh_dev);
547 + if (!(nh->nh_dev->flags & IFF_UP)) {
548 + if (fi->fib_protocol != RTPROT_STATIC) {
552 + nh->nh_flags |= RTNH_F_DEAD;
562 struct in_device *in_dev;
567 if (!(in_dev->dev->flags&IFF_UP)) {
568 - in_dev_put(in_dev);
570 + if (fi->fib_protocol != RTPROT_STATIC) {
571 + in_dev_put(in_dev);
574 + nh->nh_flags |= RTNH_F_DEAD;
576 nh->nh_dev = in_dev->dev;
577 dev_hold(nh->nh_dev);
580 if (nh->nh_flags&RTNH_F_DEAD)
582 - if (!flp->oif || flp->oif == nh->nh_oif)
584 + if (flp->oif && flp->oif != nh->nh_oif)
586 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
587 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
591 #ifdef CONFIG_IP_ROUTE_MULTIPATH
592 if (nhsel < fi->fib_nhs) {
593 @@ -1197,18 +1276,29 @@
596 change_nexthops(fi) {
597 - if (nh->nh_flags&RTNH_F_DEAD)
599 - else if (nh->nh_dev == dev &&
600 - nh->nh_scope != scope) {
601 - nh->nh_flags |= RTNH_F_DEAD;
602 + if (nh->nh_flags&RTNH_F_DEAD) {
603 + if (fi->fib_protocol!=RTPROT_STATIC ||
604 + nh->nh_dev == NULL ||
605 + __in_dev_get_rtnl(nh->nh_dev) == NULL ||
606 + nh->nh_dev->flags&IFF_UP)
608 + } else if (nh->nh_dev == dev &&
609 + nh->nh_scope != scope) {
610 + write_lock_bh(&fib_nhflags_lock);
611 #ifdef CONFIG_IP_ROUTE_MULTIPATH
612 - spin_lock_bh(&fib_multipath_lock);
613 + spin_lock(&fib_multipath_lock);
614 + nh->nh_flags |= RTNH_F_DEAD;
615 fi->fib_power -= nh->nh_power;
617 - spin_unlock_bh(&fib_multipath_lock);
618 + spin_unlock(&fib_multipath_lock);
620 + nh->nh_flags |= RTNH_F_DEAD;
623 + write_unlock_bh(&fib_nhflags_lock);
624 + if (fi->fib_protocol!=RTPROT_STATIC ||
626 + __in_dev_get_rtnl(dev) == NULL)
629 #ifdef CONFIG_IP_ROUTE_MULTIPATH
630 if (force > 1 && nh->nh_dev == dev) {
631 @@ -1227,11 +1317,8 @@
635 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
638 - Dead device goes up. We wake up dead nexthops.
639 - It takes sense only on multipath routes.
640 + Dead device goes up or new address is added. We wake up dead nexthops.
643 int fib_sync_up(struct net_device *dev)
644 @@ -1241,8 +1328,10 @@
645 struct hlist_head *head;
646 struct hlist_node *node;
649 + struct fib_result res;
653 if (!(dev->flags&IFF_UP))
656 @@ -1250,6 +1339,7 @@
657 hash = fib_devindex_hashfn(dev->ifindex);
658 head = &fib_info_devhash[hash];
662 hlist_for_each_entry(nh, node, head, nh_hash) {
663 struct fib_info *fi = nh->nh_parent;
664 @@ -1262,19 +1352,37 @@
667 change_nexthops(fi) {
668 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
670 + if (!(nh->nh_flags&RTNH_F_DEAD))
673 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
675 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
677 + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
678 + struct flowi fl = {
680 + { .daddr = nh->nh_gw,
681 + .scope = nh->nh_scope } },
684 + if (fib_lookup(&fl, &res) != 0)
686 + if (res.type != RTN_UNICAST &&
687 + res.type != RTN_LOCAL) {
691 + nh->nh_scope = res.scope;
696 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
697 spin_lock_bh(&fib_multipath_lock);
699 nh->nh_flags &= ~RTNH_F_DEAD;
700 spin_unlock_bh(&fib_multipath_lock);
702 } endfor_nexthops(fi)
705 @@ -1282,10 +1390,14 @@
715 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
718 The algorithm is suboptimal, but it provides really
719 fair weighted route distribution.
720 @@ -1294,24 +1406,45 @@
721 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
723 struct fib_info *fi = res->fi;
727 spin_lock_bh(&fib_multipath_lock);
731 + change_nexthops(fi) {
732 + if (flp->oif != nh->nh_oif)
734 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
735 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
737 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
738 + if (nh->nh_power > w) {
743 + } endfor_nexthops(fi);
745 + spin_unlock_bh(&fib_multipath_lock);
753 if (fi->fib_power <= 0) {
755 change_nexthops(fi) {
756 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
757 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
758 power += nh->nh_weight;
759 nh->nh_power = nh->nh_weight;
761 } endfor_nexthops(fi);
762 fi->fib_power = power;
764 - spin_unlock_bh(&fib_multipath_lock);
765 - /* Race condition: route has just become dead. */
774 @@ -1321,20 +1454,40 @@
776 w = jiffies % fi->fib_power;
779 change_nexthops(fi) {
780 - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
781 + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
782 if ((w -= nh->nh_power) <= 0) {
785 - res->nh_sel = nhsel;
786 spin_unlock_bh(&fib_multipath_lock);
787 + res->nh_sel = nhsel;
792 + } endfor_nexthops(fi);
801 + if (!(nh->nh_flags&RTNH_F_DEAD)) {
802 + if (flp->oif && flp->oif != nh->nh_oif)
804 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
805 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
807 + spin_unlock_bh(&fib_multipath_lock);
808 + res->nh_sel = nhsel;
811 } endfor_nexthops(fi);
813 /* Race condition: route has just become dead. */
815 spin_unlock_bh(&fib_multipath_lock);
818 diff -ur v2.6.14/linux/net/ipv4/netfilter/ip_nat_core.c linux/net/ipv4/netfilter/ip_nat_core.c
819 --- v2.6.14/linux/net/ipv4/netfilter/ip_nat_core.c 2005-10-29 14:15:09.000000000 +0300
820 +++ linux/net/ipv4/netfilter/ip_nat_core.c 2005-10-29 18:11:32.887759000 +0300
822 EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
826 +ip_nat_route_input(unsigned int hooknum,
827 + struct sk_buff **pskb,
828 + const struct net_device *in,
829 + const struct net_device *out,
830 + int (*okfn)(struct sk_buff *))
832 + struct sk_buff *skb = *pskb;
834 + struct ip_conntrack *conn;
835 + enum ip_conntrack_info ctinfo;
836 + enum ip_conntrack_dir dir;
837 + unsigned long statusbit;
840 + if (!(conn = ip_conntrack_get(skb, &ctinfo)))
843 + if (!(conn->status & IPS_NAT_DONE_MASK))
845 + dir = CTINFO2DIR(ctinfo);
846 + statusbit = IPS_SRC_NAT;
847 + if (dir == IP_CT_DIR_REPLY)
848 + statusbit ^= IPS_NAT_MASK;
849 + if (!(conn->status & statusbit))
855 + if (skb->len < sizeof(struct iphdr))
858 + /* use daddr in other direction as masquerade address (lsrc) */
860 + saddr = conn->tuplehash[!dir].tuple.dst.ip;
861 + if (saddr == iph->saddr)
864 + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
870 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
872 static int __init ip_nat_init(void)
875 diff -ur v2.6.14/linux/net/ipv4/netfilter/ip_nat_standalone.c linux/net/ipv4/netfilter/ip_nat_standalone.c
876 --- v2.6.14/linux/net/ipv4/netfilter/ip_nat_standalone.c 2005-10-29 14:15:09.000000000 +0300
877 +++ linux/net/ipv4/netfilter/ip_nat_standalone.c 2005-10-29 18:11:32.887759000 +0300
879 .priority = NF_IP_PRI_NAT_DST,
882 +/* Before routing, route before mangling */
883 +static struct nf_hook_ops ip_nat_inr_ops = {
884 + .hook = ip_nat_route_input,
886 + .hooknum = NF_IP_PRE_ROUTING,
887 + .priority = NF_IP_PRI_LAST-1,
890 /* After packet filtering, change source */
891 static struct nf_hook_ops ip_nat_out_ops = {
893 @@ -330,10 +338,15 @@
894 printk("ip_nat_init: can't register in hook.\n");
895 goto cleanup_rule_init;
897 + ret = nf_register_hook(&ip_nat_inr_ops);
899 + printk("ip_nat_init: can't register inr hook.\n");
900 + goto cleanup_inops;
902 ret = nf_register_hook(&ip_nat_out_ops);
904 printk("ip_nat_init: can't register out hook.\n");
905 - goto cleanup_inops;
906 + goto cleanup_inrops;
908 ret = nf_register_hook(&ip_nat_adjust_in_ops);
911 nf_unregister_hook(&ip_nat_adjust_in_ops);
913 nf_unregister_hook(&ip_nat_out_ops);
915 + nf_unregister_hook(&ip_nat_inr_ops);
917 nf_unregister_hook(&ip_nat_in_ops);
919 diff -ur v2.6.14/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
920 --- v2.6.14/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2005-10-29 14:15:09.000000000 +0300
921 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2005-10-29 18:11:32.887759000 +0300
926 - rt = (struct rtable *)(*pskb)->dst;
927 - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
929 - printk("MASQUERADE: %s ate my IP address\n", out->name);
933 + struct flowi fl = { .nl_u = { .ip4_u =
934 + { .daddr = (*pskb)->nh.iph->daddr,
935 + .tos = (RT_TOS((*pskb)->nh.iph->tos) |
937 + .gw = ((struct rtable *) (*pskb)->dst)->rt_gateway,
938 +#ifdef CONFIG_IP_ROUTE_FWMARK
939 + .fwmark = (*pskb)->nfmark
942 + .oif = out->ifindex };
943 + if (ip_route_output_key(&rt, &fl) != 0) {
944 + /* Funky routing can do this. */
945 + if (net_ratelimit())
946 + printk("MASQUERADE:"
947 + " No route: Rusty's brain broke!\n");
952 + newsrc = rt->rt_src;
953 + DEBUGP("newsrc = %u.%u.%u.%u\n", NIPQUAD(newsrc));
956 write_lock_bh(&masq_lock);
957 ct->nat.masq_index = out->ifindex;
958 write_unlock_bh(&masq_lock);
959 diff -ur v2.6.14/linux/net/ipv4/route.c linux/net/ipv4/route.c
960 --- v2.6.14/linux/net/ipv4/route.c 2005-10-29 14:15:09.000000000 +0300
961 +++ linux/net/ipv4/route.c 2005-10-29 18:11:32.889758696 +0300
962 @@ -1197,6 +1197,7 @@
964 /* Gateway is different ... */
965 rt->rt_gateway = new_gw;
966 + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
968 /* Redirect received -> path was valid */
969 dst_confirm(&rth->u.dst);
970 @@ -1632,6 +1633,7 @@
971 rth->fl.fl4_fwmark= skb->nfmark;
973 rth->fl.fl4_src = saddr;
974 + rth->fl.fl4_lsrc = 0;
976 #ifdef CONFIG_NET_CLS_ROUTE
977 rth->u.dst.tclassid = itag;
978 @@ -1642,6 +1644,7 @@
979 dev_hold(rth->u.dst.dev);
980 rth->idev = in_dev_get(rth->u.dst.dev);
982 + rth->fl.fl4_gw = 0;
983 rth->rt_gateway = daddr;
984 rth->rt_spec_dst= spec_dst;
985 rth->rt_type = RTN_MULTICAST;
986 @@ -1706,7 +1709,7 @@
987 struct fib_result* res,
988 struct in_device *in_dev,
989 u32 daddr, u32 saddr, u32 tos,
990 - struct rtable **result)
991 + u32 lsrc, struct rtable **result)
995 @@ -1739,6 +1742,7 @@
996 flags |= RTCF_DIRECTSRC;
998 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
1000 (IN_DEV_SHARED_MEDIA(out_dev) ||
1001 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1002 flags |= RTCF_DOREDIRECT;
1003 @@ -1778,6 +1782,7 @@
1005 rth->fl.fl4_src = saddr;
1006 rth->rt_src = saddr;
1007 + rth->fl.fl4_lsrc = lsrc;
1008 rth->rt_gateway = daddr;
1010 rth->fl.iif = in_dev->dev->ifindex;
1011 @@ -1785,6 +1790,7 @@
1012 dev_hold(rth->u.dst.dev);
1013 rth->idev = in_dev_get(rth->u.dst.dev);
1015 + rth->fl.fl4_gw = 0;
1016 rth->rt_spec_dst= spec_dst;
1018 rth->u.dst.input = ip_forward;
1019 @@ -1806,19 +1812,20 @@
1020 struct fib_result* res,
1021 const struct flowi *fl,
1022 struct in_device *in_dev,
1023 - u32 daddr, u32 saddr, u32 tos)
1024 + u32 daddr, u32 saddr, u32 tos, u32 lsrc)
1026 struct rtable* rth = NULL;
1030 + fib_select_default(fl, res);
1031 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1032 - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1033 + if (res->fi && res->fi->fib_nhs > 1)
1034 fib_select_multipath(fl, res);
1037 /* create a routing cache entry */
1038 - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1039 + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1043 @@ -1831,7 +1838,7 @@
1044 struct fib_result* res,
1045 const struct flowi *fl,
1046 struct in_device *in_dev,
1047 - u32 daddr, u32 saddr, u32 tos)
1048 + u32 daddr, u32 saddr, u32 tos, u32 lsrc)
1050 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1051 struct rtable* rth = NULL, *rtres;
1052 @@ -1847,7 +1854,7 @@
1053 /* distinguish between multipath and singlepath */
1055 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
1059 /* add all alternatives to the routing cache */
1060 for (hop = 0; hop < hopcount; hop++) {
1061 @@ -1859,7 +1866,7 @@
1063 /* create a routing cache entry */
1064 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
1070 @@ -1879,7 +1886,7 @@
1071 skb->dst = &rtres->u.dst;
1073 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1074 - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
1075 + return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos, lsrc);
1076 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1079 @@ -1895,20 +1902,20 @@
1082 static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
1083 - u8 tos, struct net_device *dev)
1084 + u8 tos, struct net_device *dev, u32 lsrc)
1086 struct fib_result res;
1087 struct in_device *in_dev = in_dev_get(dev);
1088 struct flowi fl = { .nl_u = { .ip4_u =
1091 + .saddr = lsrc? : saddr,
1093 .scope = RT_SCOPE_UNIVERSE,
1094 #ifdef CONFIG_IP_ROUTE_FWMARK
1095 .fwmark = skb->nfmark
1098 - .iif = dev->ifindex };
1099 + .iif = lsrc? loopback_dev.ifindex : dev->ifindex };
1102 struct rtable * rth;
1103 @@ -1941,6 +1948,12 @@
1104 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
1105 goto martian_destination;
1108 + if (MULTICAST(lsrc) || BADCLASS(lsrc) ||
1109 + ZERONET(lsrc) || LOOPBACK(lsrc))
1114 * Now we are ready to route packet.
1116 @@ -1950,6 +1963,10 @@
1120 + if (lsrc && res.type != RTN_UNICAST && res.type != RTN_NAT)
1122 + fl.iif = dev->ifindex;
1123 + fl.fl4_src = saddr;
1125 RT_CACHE_STAT_INC(in_slow_tot);
1127 @@ -1974,7 +1991,7 @@
1128 if (res.type != RTN_UNICAST)
1129 goto martian_destination;
1131 - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1132 + err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos, lsrc);
1133 if (err == -ENOBUFS)
1136 @@ -1989,6 +2006,8 @@
1138 if (skb->protocol != htons(ETH_P_IP))
1144 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1145 @@ -2031,6 +2050,7 @@
1146 rth->u.dst.dev = &loopback_dev;
1147 dev_hold(rth->u.dst.dev);
1148 rth->idev = in_dev_get(rth->u.dst.dev);
1149 + rth->fl.fl4_gw = 0;
1150 rth->rt_gateway = daddr;
1151 rth->rt_spec_dst= spec_dst;
1152 rth->u.dst.input= ip_local_deliver;
1153 @@ -2080,8 +2100,9 @@
1157 -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
1158 - u8 tos, struct net_device *dev)
1160 +ip_route_input_cached(struct sk_buff *skb, u32 daddr, u32 saddr,
1161 + u8 tos, struct net_device *dev, u32 lsrc)
1163 struct rtable * rth;
1165 @@ -2096,6 +2117,7 @@
1166 if (rth->fl.fl4_dst == daddr &&
1167 rth->fl.fl4_src == saddr &&
1168 rth->fl.iif == iif &&
1169 + rth->fl.fl4_lsrc == lsrc &&
1171 #ifdef CONFIG_IP_ROUTE_FWMARK
1172 rth->fl.fl4_fwmark == skb->nfmark &&
1173 @@ -2144,7 +2166,19 @@
1177 - return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1178 + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1181 +int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
1182 + u8 tos, struct net_device *dev)
1184 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1187 +int ip_route_input_lookup(struct sk_buff *skb, u32 daddr, u32 saddr,
1188 + u8 tos, struct net_device *dev, u32 lsrc)
1190 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1193 static inline int __mkroute_output(struct rtable **result,
1194 @@ -2223,6 +2257,7 @@
1195 rth->fl.fl4_tos = tos;
1196 rth->fl.fl4_src = oldflp->fl4_src;
1197 rth->fl.oif = oldflp->oif;
1198 + rth->fl.fl4_gw = oldflp->fl4_gw;
1199 #ifdef CONFIG_IP_ROUTE_FWMARK
1200 rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
1202 @@ -2370,6 +2405,7 @@
1203 struct flowi fl = { .nl_u = { .ip4_u =
1204 { .daddr = oldflp->fl4_dst,
1205 .saddr = oldflp->fl4_src,
1206 + .gw = oldflp->fl4_gw,
1207 .tos = tos & IPTOS_RT_MASK,
1208 .scope = ((tos & RTO_ONLINK) ?
1210 @@ -2475,6 +2511,7 @@
1211 dev_out = &loopback_dev;
1213 fl.oif = loopback_dev.ifindex;
1215 res.type = RTN_LOCAL;
1216 flags |= RTCF_LOCAL;
1218 @@ -2482,7 +2519,7 @@
1220 if (fib_lookup(&fl, &res)) {
1222 - if (oldflp->oif) {
1223 + if (oldflp->oif && dev_out->flags & IFF_UP) {
1224 /* Apparently, routing tables are wrong. Assume,
1225 that the destination is on link.
1227 @@ -2522,6 +2559,7 @@
1228 dev_out = &loopback_dev;
1230 fl.oif = dev_out->ifindex;
1233 fib_info_put(res.fi);
1235 @@ -2529,13 +2567,12 @@
1239 + if (res.type == RTN_UNICAST)
1240 + fib_select_default(&fl, &res);
1241 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1242 - if (res.fi->fib_nhs > 1 && fl.oif == 0)
1243 + if (res.fi->fib_nhs > 1)
1244 fib_select_multipath(&fl, &res);
1247 - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1248 - fib_select_default(&fl, &res);
1251 fl.fl4_src = FIB_RES_PREFSRC(res);
1252 @@ -2572,6 +2609,7 @@
1253 rth->fl.fl4_src == flp->fl4_src &&
1255 rth->fl.oif == flp->oif &&
1256 + rth->fl.fl4_gw == flp->fl4_gw &&
1257 #ifdef CONFIG_IP_ROUTE_FWMARK
1258 rth->fl.fl4_fwmark == flp->fl4_fwmark &&
1260 @@ -3211,3 +3249,4 @@
1261 EXPORT_SYMBOL(__ip_select_ident);
1262 EXPORT_SYMBOL(ip_route_input);
1263 EXPORT_SYMBOL(ip_route_output_key);
1264 +EXPORT_SYMBOL(ip_route_input_lookup);