root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. ip_rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasn't being copied right.
  42  *              Bjorn Ekwall    :       Kerneld route support.
  43  *              Alan Cox        :       Multicast fixed (I hope)
  44  *              Pavel Krauz     :       Limited broadcast fixed
  45  *
  46  *              This program is free software; you can redistribute it and/or
  47  *              modify it under the terms of the GNU General Public License
  48  *              as published by the Free Software Foundation; either version
  49  *              2 of the License, or (at your option) any later version.
  50  */
  51 
  52 #include <linux/config.h>
  53 #include <asm/segment.h>
  54 #include <asm/system.h>
  55 #include <asm/bitops.h>
  56 #include <linux/types.h>
  57 #include <linux/kernel.h>
  58 #include <linux/sched.h>
  59 #include <linux/mm.h>
  60 #include <linux/string.h>
  61 #include <linux/socket.h>
  62 #include <linux/sockios.h>
  63 #include <linux/errno.h>
  64 #include <linux/in.h>
  65 #include <linux/inet.h>
  66 #include <linux/netdevice.h>
  67 #include <linux/if_arp.h>
  68 #include <net/ip.h>
  69 #include <net/protocol.h>
  70 #include <net/route.h>
  71 #include <net/tcp.h>
  72 #include <linux/skbuff.h>
  73 #include <net/sock.h>
  74 #include <net/icmp.h>
  75 #include <net/netlink.h>
  76 #ifdef CONFIG_KERNELD
  77 #include <linux/kerneld.h>
  78 #endif
  79 
  80 /*
  81  * Forwarding Information Base definitions.
  82  */
  83 
  84 struct fib_node
  85 {
  86         struct fib_node         *fib_next;
  87         __u32                   fib_dst;
  88         unsigned long           fib_use;
  89         struct fib_info         *fib_info;
  90         short                   fib_metric;
  91         unsigned char           fib_tos;
  92 };
  93 
  94 /*
  95  * This structure contains data shared by many of routes.
  96  */     
  97 
  98 struct fib_info
  99 {
 100         struct fib_info         *fib_next;
 101         struct fib_info         *fib_prev;
 102         __u32                   fib_gateway;
 103         struct device           *fib_dev;
 104         int                     fib_refcnt;
 105         unsigned long           fib_window;
 106         unsigned short          fib_flags;
 107         unsigned short          fib_mtu;
 108         unsigned short          fib_irtt;
 109 };
 110 
 111 struct fib_zone
 112 {
 113         struct fib_zone *fz_next;
 114         struct fib_node **fz_hash_table;
 115         struct fib_node *fz_list;
 116         int             fz_nent;
 117         int             fz_logmask;
 118         __u32           fz_mask;
 119 };
 120 
 121 static struct fib_zone  *fib_zones[33];
 122 static struct fib_zone  *fib_zone_list;
 123 static struct fib_node  *fib_loopback = NULL;
 124 static struct fib_info  *fib_info_list;
 125 
 126 /*
 127  * Backlogging.
 128  */
 129 
 130 #define RT_BH_REDIRECT          0
 131 #define RT_BH_GARBAGE_COLLECT   1
 132 #define RT_BH_FREE              2
 133 
 134 struct rt_req
 135 {
 136         struct rt_req * rtr_next;
 137         struct device *dev;
 138         __u32 dst;
 139         __u32 gw;
 140         unsigned char tos;
 141 };
 142 
 143 int                     ip_rt_lock;
 144 unsigned                ip_rt_bh_mask;
 145 static struct rt_req    *rt_backlog;
 146 
 147 /*
 148  * Route cache.
 149  */
 150 
 151 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 152 static int              rt_cache_size;
 153 static struct rtable    *rt_free_queue;
 154 struct wait_queue       *rt_wait;
 155 
 156 static void rt_kick_backlog(void);
 157 static void rt_cache_add(unsigned hash, struct rtable * rth);
 158 static void rt_cache_flush(void);
 159 static void rt_garbage_collect_1(void);
 160 
 161 /* 
 162  * Evaluate mask length.
 163  */
 164 
 165 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 {
 167         if (!(mask = ntohl(mask)))
 168                 return 32;
 169         return ffz(~mask);
 170 }
 171 
 172 /* 
 173  * Create mask from length.
 174  */
 175 
 176 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         if (logmask >= 32)
 179                 return 0;
 180         return htonl(~((1<<logmask)-1));
 181 }
 182 
 183 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185         return ip_rt_hash_code(ntohl(dst)>>logmask);
 186 }
 187 
 188 /*
 189  * Free FIB node.
 190  */
 191 
 192 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         struct fib_info * fi = f->fib_info;
 195         if (!--fi->fib_refcnt)
 196         {
 197 #if RT_CACHE_DEBUG >= 2
 198                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 199 #endif
 200                 if (fi->fib_next)
 201                         fi->fib_next->fib_prev = fi->fib_prev;
 202                 if (fi->fib_prev)
 203                         fi->fib_prev->fib_next = fi->fib_next;
 204                 if (fi == fib_info_list)
 205                         fib_info_list = fi->fib_next;
 206         }
 207         kfree_s(f, sizeof(struct fib_node));
 208 }
 209 
 210 /*
 211  * Find gateway route by address.
 212  */
 213 
 214 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216         struct fib_zone * fz;
 217         struct fib_node * f;
 218 
 219         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 220         {
 221                 if (fz->fz_hash_table)
 222                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 223                 else
 224                         f = fz->fz_list;
 225                 
 226                 for ( ; f; f = f->fib_next)
 227                 {
 228                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 229                                 continue;
 230                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 231                                 return NULL;
 232                         return f;
 233                 }
 234         }
 235         return NULL;
 236 }
 237 
 238 /*
 239  * Find local route by address.
 240  * FIXME: I use "longest match" principle. If destination
 241  *        has some non-local route, I'll not search shorter matches.
 242  *        It's possible, I'm wrong, but I wanted to prevent following
 243  *        situation:
 244  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 245  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 246  *        (Two ethernets connected by serial line, one is small and other is large)
 247  *        Host 193.233.7.129 is locally unreachable,
 248  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 249  *
 250  */
 251 
 252 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254         struct fib_zone * fz;
 255         struct fib_node * f;
 256 
 257         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 258         {
 259                 int longest_match_found = 0;
 260 
 261                 if (fz->fz_hash_table)
 262                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 263                 else
 264                         f = fz->fz_list;
 265                 
 266                 for ( ; f; f = f->fib_next)
 267                 {
 268                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 269                                 continue;
 270                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 271                                 return f;
 272                         longest_match_found = 1;
 273                 }
 274                 if (longest_match_found)
 275                         return NULL;
 276         }
 277         return NULL;
 278 }
 279 
 280 /*
 281  * Main lookup routine.
 282  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 283  *      by user. It doesn't route non-CIDR broadcasts by default.
 284  *
 285  *      F.e.
 286  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 287  *      is valid, but if you really are not able (not allowed, do not want) to
 288  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 289  *              route add -host 193.233.7.255 eth0
 290  */
 291 
 292 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294         struct fib_zone * fz;
 295         struct fib_node * f;
 296 
 297         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 298         {
 299                 if (fz->fz_hash_table)
 300                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 301                 else
 302                         f = fz->fz_list;
 303                 
 304                 for ( ; f; f = f->fib_next)
 305                 {
 306                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 307                                 continue;
 308                         return f;
 309                 }
 310         }
 311         return NULL;
 312 }
 313 
 314 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316         struct fib_node * f;
 317         f = fib_lookup_gateway(gw);
 318         if (f)
 319                 return f->fib_info->fib_dev;
 320         return NULL;
 321 }
 322 
 323 /*
 324  *      Used by 'rt_add()' when we can't get the netmask any other way..
 325  *
 326  *      If the lower byte or two are zero, we guess the mask based on the
 327  *      number of zero 8-bit net numbers, otherwise we use the "default"
 328  *      masks judging by the destination address and our device netmask.
 329  */
 330  
 331 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333         dst = ntohl(dst);
 334         if (IN_CLASSA(dst))
 335                 return htonl(IN_CLASSA_NET);
 336         if (IN_CLASSB(dst))
 337                 return htonl(IN_CLASSB_NET);
 338         return htonl(IN_CLASSC_NET);
 339 }
 340 
 341 
 342 /*
 343  *      If no mask is specified then generate a default entry.
 344  */
 345 
 346 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 347 {
 348         __u32 mask;
 349 
 350         if (!dst)
 351                 return 0;
 352         mask = default_mask(dst);
 353         if ((dst ^ dev->pa_addr) & mask)
 354                 return mask;
 355         return dev->pa_mask;
 356 }
 357 
 358 
 359 /*
 360  *      Check if a mask is acceptable.
 361  */
 362  
 363 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 364 {
 365         if (addr & (mask = ~mask))
 366                 return 1;
 367         mask = ntohl(mask);
 368         if (mask & (mask+1))
 369                 return 1;
 370         return 0;
 371 }
 372 
 373 
 374 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 375                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 376 {
 377         struct fib_node *f;
 378         int found=0;
 379 
 380         while((f = *fp) != NULL) 
 381         {
 382                 struct fib_info * fi = f->fib_info;
 383 
 384                 /*
 385                  *      Make sure the destination and netmask match.
 386                  *      metric, gateway and device are also checked
 387                  *      if they were specified.
 388                  */
 389                 if (f->fib_dst != dst ||
 390                     (gtw && fi->fib_gateway != gtw) ||
 391                     (metric >= 0 && f->fib_metric != metric) ||
 392                     (dev && fi->fib_dev != dev) )
 393                 {
 394                         fp = &f->fib_next;
 395                         continue;
 396                 }
 397                 cli();
 398                 *fp = f->fib_next;
 399                 if (fib_loopback == f)
 400                         fib_loopback = NULL;
 401                 sti();
 402                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 403                 fib_free_node(f);
 404                 found++;
 405         }
 406         return found;
 407 }
 408 
 409 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 410                 struct device * dev, __u32 gtw, short flags, short metric)
 411 {
 412         struct fib_node **fp;
 413         struct fib_zone *fz;
 414         int found=0;
 415 
 416         if (!mask)
 417         {
 418                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 419                 {
 420                         int tmp;
 421                         if (fz->fz_hash_table)
 422                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 423                         else
 424                                 fp = &fz->fz_list;
 425 
 426                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 427                         fz->fz_nent -= tmp;
 428                         found += tmp;
 429                 }
 430         } 
 431         else
 432         {
 433                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 434                 {
 435                         if (fz->fz_hash_table)
 436                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 437                         else
 438                                 fp = &fz->fz_list;
 439         
 440                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 441                         fz->fz_nent -= found;
 442                 }
 443         }
 444 
 445         if (found)
 446         {
 447                 rt_cache_flush();
 448                 return 0;
 449         }
 450         return -ESRCH;
 451 }
 452 
 453 
 454 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 455                                          unsigned short flags, unsigned short mss,
 456                                          unsigned long window, unsigned short irtt)
 457 {
 458         struct fib_info * fi;
 459 
 460         if (!(flags & RTF_MSS))
 461         {
 462                 mss = dev->mtu;
 463 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 464                 /*
 465                  *      If MTU was not specified, use default.
 466                  *      If you want to increase MTU for some net (local subnet)
 467                  *      use "route add .... mss xxx".
 468                  *
 469                  *      The MTU isn't currently always used and computed as it
 470                  *      should be as far as I can tell. [Still verifying this is right]
 471                  */
 472                 if ((flags & RTF_GATEWAY) && mss > 576)
 473                         mss = 576;
 474 #endif
 475         }
 476         if (!(flags & RTF_WINDOW))
 477                 window = 0;
 478         if (!(flags & RTF_IRTT))
 479                 irtt = 0;
 480 
 481         for (fi=fib_info_list; fi; fi = fi->fib_next)
 482         {
 483                 if (fi->fib_gateway != gw ||
 484                     fi->fib_dev != dev  ||
 485                     fi->fib_flags != flags ||
 486                     fi->fib_mtu != mss ||
 487                     fi->fib_window != window ||
 488                     fi->fib_irtt != irtt)
 489                         continue;
 490                 fi->fib_refcnt++;
 491 #if RT_CACHE_DEBUG >= 2
 492                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 493 #endif
 494                 return fi;
 495         }
 496         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 497         if (!fi)
 498                 return NULL;
 499         memset(fi, 0, sizeof(struct fib_info));
 500         fi->fib_flags = flags;
 501         fi->fib_dev = dev;
 502         fi->fib_gateway = gw;
 503         fi->fib_mtu = mss;
 504         fi->fib_window = window;
 505         fi->fib_refcnt++;
 506         fi->fib_next = fib_info_list;
 507         fi->fib_prev = NULL;
 508         fi->fib_irtt = irtt;
 509         if (fib_info_list)
 510                 fib_info_list->fib_prev = fi;
 511         fib_info_list = fi;
 512 #if RT_CACHE_DEBUG >= 2
 513         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 514 #endif
 515         return fi;
 516 }
 517 
 518 
 519 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 520         __u32 gw, struct device *dev, unsigned short mss,
 521         unsigned long window, unsigned short irtt, short metric)
 522 {
 523         struct fib_node *f, *f1;
 524         struct fib_node **fp;
 525         struct fib_node **dup_fp = NULL;
 526         struct fib_zone * fz;
 527         struct fib_info * fi;
 528         int logmask;
 529 
 530         if (flags & RTF_HOST) 
 531                 mask = 0xffffffff;
 532         /*
 533          * If mask is not specified, try to guess it.
 534          */
 535         else if (!mask)
 536         {
 537                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 538                 {
 539                         mask = dev->pa_mask;
 540                         flags &= ~RTF_GATEWAY;
 541                         if (flags & RTF_DYNAMIC) 
 542                         {
 543                                 printk("Dynamic route to my own net rejected\n");
 544                                 return;
 545                         }
 546                 } 
 547                 else
 548                         mask = guess_mask(dst, dev);
 549                 dst &= mask;
 550         }
 551         
 552         /*
 553          *      A gateway must be reachable and not a local address
 554          */
 555          
 556         if (gw == dev->pa_addr)
 557                 flags &= ~RTF_GATEWAY;
 558                 
 559         if (flags & RTF_GATEWAY) 
 560         {
 561                 /*
 562                  *      Don't try to add a gateway we can't reach.. 
 563                  *      Tunnel devices are exempt from this rule.
 564                  */
 565                  
 566                 if ((dev != get_gw_dev(gw)) && dev->type!=ARPHRD_TUNNEL)
 567                         return;
 568                         
 569                 flags |= RTF_GATEWAY;
 570         } 
 571         else
 572                 gw = 0;
 573                 
 574         /*
 575          *      Allocate an entry and fill it in.
 576          */
 577          
 578         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 579         if (f == NULL)
 580                 return;
 581 
 582         memset(f, 0, sizeof(struct fib_node));
 583         f->fib_dst = dst;
 584         f->fib_metric = metric;
 585         f->fib_tos    = 0;
 586 
 587         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 588         {
 589                 kfree_s(f, sizeof(struct fib_node));
 590                 return;
 591         }
 592         f->fib_info = fi;
 593 
 594         logmask = rt_logmask(mask);
 595         fz = fib_zones[logmask];
 596 
 597 
 598         if (!fz)
 599         {
 600                 int i;
 601                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 602                 if (!fz)
 603                 {
 604                         fib_free_node(f);
 605                         return;
 606                 }
 607                 memset(fz, 0, sizeof(struct fib_zone));
 608                 fz->fz_logmask = logmask;
 609                 fz->fz_mask = mask;
 610                 for (i=logmask-1; i>=0; i--)
 611                         if (fib_zones[i])
 612                                 break;
 613                 cli();
 614                 if (i<0)
 615                 {
 616                         fz->fz_next = fib_zone_list;
 617                         fib_zone_list = fz;
 618                 }
 619                 else
 620                 {
 621                         fz->fz_next = fib_zones[i]->fz_next;
 622                         fib_zones[i]->fz_next = fz;
 623                 }
 624                 fib_zones[logmask] = fz;
 625                 sti();
 626         }
 627 
 628         /*
 629          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 630          */
 631 
 632         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 633         {
 634                 struct fib_node ** ht;
 635 #if RT_CACHE_DEBUG
 636                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 637 #endif
 638                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 639 
 640                 if (ht)
 641                 {
 642                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 643                         cli();
 644                         f1 = fz->fz_list;
 645                         while (f1)
 646                         {
 647                                 struct fib_node * next;
 648                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 649                                 next = f1->fib_next;
 650                                 f1->fib_next = ht[hash];
 651                                 ht[hash] = f1;
 652                                 f1 = next;
 653                         }
 654                         fz->fz_list = NULL;
 655                         fz->fz_hash_table = ht; 
 656                         sti();
 657                 }
 658         }
 659 
 660         if (fz->fz_hash_table)
 661                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 662         else
 663                 fp = &fz->fz_list;
 664 
 665         /*
 666          * Scan list to find the first route with the same destination
 667          */
 668         while ((f1 = *fp) != NULL)
 669         {
 670                 if (f1->fib_dst == dst)
 671                         break;
 672                 fp = &f1->fib_next;
 673         }
 674 
 675         /*
 676          * Find route with the same destination and less (or equal) metric.
 677          */
 678         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 679         {
 680                 if (f1->fib_metric >= metric)
 681                         break;
 682                 /*
 683                  *      Record route with the same destination and gateway,
 684                  *      but less metric. We'll delete it 
 685                  *      after instantiation of new route.
 686                  */
 687                 if (f1->fib_info->fib_gateway == gw)
 688                         dup_fp = fp;
 689                 fp = &f1->fib_next;
 690         }
 691 
 692         /*
 693          * Is it already present?
 694          */
 695 
 696         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 697         {
 698                 fib_free_node(f);
 699                 return;
 700         }
 701         
 702         /*
 703          * Insert new entry to the list.
 704          */
 705 
 706         cli();
 707         f->fib_next = f1;
 708         *fp = f;
 709         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 710                 fib_loopback = f;
 711         sti();
 712         fz->fz_nent++;
 713         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 714 
 715         /*
 716          *      Delete route with the same destination and gateway.
 717          *      Note that we should have at most one such route.
 718          */
 719         if (dup_fp)
 720                 fp = dup_fp;
 721         else
 722                 fp = &f->fib_next;
 723 
 724         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 725         {
 726                 if (f1->fib_info->fib_gateway == gw)
 727                 {
 728                         cli();
 729                         *fp = f1->fib_next;
 730                         if (fib_loopback == f1)
 731                                 fib_loopback = NULL;
 732                         sti();
 733                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 734                         fib_free_node(f1);
 735                         fz->fz_nent--;
 736                         break;
 737                 }
 738                 fp = &f1->fib_next;
 739         }
 740         rt_cache_flush();
 741         return;
 742 }
 743 
 744 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 745 {
 746         int found = 0;
 747         struct fib_node *f;
 748 
 749         while ((f = *fp) != NULL) {
 750                 if (f->fib_info->fib_dev != dev) {
 751                         fp = &f->fib_next;
 752                         continue;
 753                 }
 754                 cli();
 755                 *fp = f->fib_next;
 756                 if (fib_loopback == f)
 757                         fib_loopback = NULL;
 758                 sti();
 759                 fib_free_node(f);
 760                 found++;
 761         }
 762         return found;
 763 }
 764 
 765 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 766 {
 767         struct fib_zone *fz;
 768         int found = 0;
 769 
 770         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 771         {
 772                 if (fz->fz_hash_table)
 773                 {
 774                         int i;
 775                         int tmp = 0;
 776                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 777                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 778                         fz->fz_nent -= tmp;
 779                         found += tmp;
 780                 }
 781                 else
 782                 {
 783                         int tmp;
 784                         tmp = rt_flush_list(&fz->fz_list, dev);
 785                         fz->fz_nent -= tmp;
 786                         found += tmp;
 787                 }
 788         }
 789                 
 790         if (found)
 791                 rt_cache_flush();
 792 }
 793 
 794 
 795 /* 
 796  *      Called from the PROCfs module. This outputs /proc/net/route.
 797  *
 798  *      We preserve the old format but pad the buffers out. This means that
 799  *      we can spin over the other entries as we read them. Remember the
 800  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 801  *      about 7Mb of data). To do that ok we will need to also cache the
 802  *      last route we got to (reads will generally be following on from
 803  *      one another without gaps).
 804  */
 805  
 806 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 807 {
 808         struct fib_zone *fz;
 809         struct fib_node *f;
 810         int len=0;
 811         off_t pos=0;
 812         char temp[129];
 813         int i;
 814         
 815         pos = 128;
 816 
 817         if (offset<128)
 818         {
 819                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 820                 len = 128;
 821         }
 822         
 823         while  (ip_rt_lock)
 824                 sleep_on(&rt_wait);
 825         ip_rt_fast_lock();
 826 
 827         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 828         {
 829                 int maxslot;
 830                 struct fib_node ** fp;
 831 
 832                 if (fz->fz_nent == 0)
 833                         continue;
 834 
 835                 if (pos + 128*fz->fz_nent <= offset)
 836                 {
 837                         pos += 128*fz->fz_nent;
 838                         len = 0;
 839                         continue;
 840                 }
 841 
 842                 if (fz->fz_hash_table)
 843                 {
 844                         maxslot = RTZ_HASH_DIVISOR;
 845                         fp      = fz->fz_hash_table;
 846                 }
 847                 else
 848                 {
 849                         maxslot = 1;
 850                         fp      = &fz->fz_list;
 851                 }
 852                         
 853                 for (i=0; i < maxslot; i++, fp++)
 854                 {
 855                         
 856                         for (f = *fp; f; f = f->fib_next) 
 857                         {
 858                                 struct fib_info * fi;
 859                                 /*
 860                                  *      Spin through entries until we are ready
 861                                  */
 862                                 pos += 128;
 863 
 864                                 if (pos <= offset)
 865                                 {
 866                                         len=0;
 867                                         continue;
 868                                 }
 869                                         
 870                                 fi = f->fib_info;
 871                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 872                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 873                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 874                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 875                                 sprintf(buffer+len,"%-127s\n",temp);
 876 
 877                                 len += 128;
 878                                 if (pos >= offset+length)
 879                                         goto done;
 880                         }
 881                 }
 882         }
 883 
 884 done:
 885         ip_rt_unlock();
 886         wake_up(&rt_wait);
 887         
 888         *start = buffer+len-(pos-offset);
 889         len = pos - offset;
 890         if (len>length)
 891                 len = length;
 892         return len;
 893 }
 894 
 895 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 896 {
 897         int len=0;
 898         off_t pos=0;
 899         char temp[129];
 900         struct rtable *r;
 901         int i;
 902 
 903         pos = 128;
 904 
 905         if (offset<128)
 906         {
 907                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP");
 908                 len = 128;
 909         }
 910         
 911         
 912         while  (ip_rt_lock)
 913                 sleep_on(&rt_wait);
 914         ip_rt_fast_lock();
 915 
 916         for (i = 0; i<RT_HASH_DIVISOR; i++)
 917         {
 918                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 919                 {
 920                         /*
 921                          *      Spin through entries until we are ready
 922                          */
 923                         pos += 128;
 924 
 925                         if (pos <= offset)
 926                         {
 927                                 len = 0;
 928                                 continue;
 929                         }
 930                                         
 931                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
 932                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 933                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 934                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 935                         sprintf(buffer+len,"%-127s\n",temp);
 936                         len += 128;
 937                         if (pos >= offset+length)
 938                                 goto done;
 939                 }
 940         }
 941 
 942 done:
 943         ip_rt_unlock();
 944         wake_up(&rt_wait);
 945         
 946         *start = buffer+len-(pos-offset);
 947         len = pos-offset;
 948         if (len>length)
 949                 len = length;
 950         return len;
 951 }
 952 
 953 
 954 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 955 {
 956         unsigned long flags;
 957 
 958         save_flags(flags);
 959         cli();
 960         if (!rt->rt_refcnt)
 961         {
 962                 struct hh_cache * hh = rt->rt_hh;
 963                 rt->rt_hh = NULL;
 964                 if (hh && !--hh->hh_refcnt)
 965                 {
 966                         restore_flags(flags);
 967                         kfree_s(hh, sizeof(struct hh_cache));
 968                 }
 969                 restore_flags(flags);
 970                 kfree_s(rt, sizeof(struct rt_table));
 971                 return;
 972         }
 973         rt->rt_next = rt_free_queue;
 974         rt->rt_flags &= ~RTF_UP;
 975         rt_free_queue = rt;
 976         ip_rt_bh_mask |= RT_BH_FREE;
 977 #if RT_CACHE_DEBUG >= 2
 978         printk("rt_free: %08x\n", rt->rt_dst);
 979 #endif
 980         restore_flags(flags);
 981 }
 982 
 983 /*
 984  * RT "bottom half" handlers. Called with masked interrupts.
 985  */
 986 
 987 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 988 {
 989         struct rtable *rt, **rtp;
 990 
 991         rtp = &rt_free_queue;
 992 
 993         while ((rt = *rtp) != NULL)
 994         {
 995                 if  (!rt->rt_refcnt)
 996                 {
 997                         struct hh_cache * hh = rt->rt_hh;
 998 #if RT_CACHE_DEBUG >= 2
 999                         __u32 daddr = rt->rt_dst;
1000 #endif
1001                         *rtp = rt->rt_next;
1002                         rt->rt_hh = NULL;
1003                         if (hh && !--hh->hh_refcnt)
1004                         {
1005                                 sti();
1006                                 kfree_s(hh, sizeof(struct hh_cache));
1007                         }
1008                         sti();
1009                         kfree_s(rt, sizeof(struct rt_table));
1010 #if RT_CACHE_DEBUG >= 2
1011                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1012 #endif
1013                         cli();
1014                         continue;
1015                 }
1016                 rtp = &rt->rt_next;
1017         }
1018 }
1019 
1020 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1021         unsigned long flags;
1022         save_flags(flags);
1023         cli();
1024         if (ip_rt_bh_mask && !ip_rt_lock)
1025         {
1026                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1027                         rt_kick_backlog();
1028 
1029                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1030                 {
1031                         ip_rt_fast_lock();
1032                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1033                         sti();
1034                         rt_garbage_collect_1();
1035                         cli();
1036                         ip_rt_fast_unlock();
1037                 }
1038 
1039                 if (ip_rt_bh_mask & RT_BH_FREE)
1040                         rt_kick_free_queue();
1041         }
1042         restore_flags(flags);
1043 }
1044 
1045 
1046 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1047 {
1048         ip_rt_fast_lock();
1049         if (ip_rt_lock == 1)
1050         {
1051                 int i;
1052                 struct rtable *rth, **rthp;
1053                 unsigned long flags;
1054                 unsigned long now = jiffies;
1055 
1056                 save_flags(flags);
1057                 for (i=0; i<RT_HASH_DIVISOR; i++)
1058                 {
1059                         rthp = &ip_rt_hash_table[i];
1060 
1061                         while ((rth = *rthp) != NULL)
1062                         {
1063                                 struct rtable * rth_next = rth->rt_next;
1064 
1065                                 /*
1066                                  * Cleanup aged off entries.
1067                                  */
1068 
1069                                 cli();
1070                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1071                                 {
1072                                         *rthp = rth_next;
1073                                         sti();
1074                                         rt_cache_size--;
1075 #if RT_CACHE_DEBUG >= 2
1076                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1077 #endif
1078                                         rt_free(rth);
1079                                         continue;
1080                                 }
1081                                 sti();
1082 
1083                                 if (!rth_next)
1084                                         break;
1085 
1086                                 /*
1087                                  * LRU ordering.
1088                                  */
1089 
1090                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOLD < rth_next->rt_lastuse ||
1091                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1092                                      rth->rt_use < rth_next->rt_use))
1093                                 {
1094 #if RT_CACHE_DEBUG >= 2
1095                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1096 #endif
1097                                         cli();
1098                                         *rthp = rth_next;
1099                                         rth->rt_next = rth_next->rt_next;
1100                                         rth_next->rt_next = rth;
1101                                         sti();
1102                                         rthp = &rth_next->rt_next;
1103                                         continue;
1104                                 }
1105                                 rthp = &rth->rt_next;
1106                         }
1107                 }
1108                 restore_flags(flags);
1109                 rt_kick_free_queue();
1110         }
1111         ip_rt_unlock();
1112 }
1113 
1114 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1115 {
1116         struct rtable *rt;
1117         unsigned long hash = ip_rt_hash_code(dst);
1118 
1119         if (gw == dev->pa_addr)
1120                 return;
1121         if (dev != get_gw_dev(gw))
1122                 return;
1123         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1124         if (rt == NULL) 
1125                 return;
1126         memset(rt, 0, sizeof(struct rtable));
1127         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1128         rt->rt_dst = dst;
1129         rt->rt_dev = dev;
1130         rt->rt_gateway = gw;
1131         rt->rt_src = dev->pa_addr;
1132         rt->rt_mtu = dev->mtu;
1133 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1134         if (dev->mtu > 576)
1135                 rt->rt_mtu = 576;
1136 #endif
1137         rt->rt_lastuse  = jiffies;
1138         rt->rt_refcnt  = 1;
1139         rt_cache_add(hash, rt);
1140         ip_rt_put(rt);
1141         return;
1142 }
1143 
1144 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1145 {
1146         int i;
1147         struct rtable * rth, * next;
1148 
1149         for (i=0; i<RT_HASH_DIVISOR; i++)
1150         {
1151                 int nr=0;
1152 
1153                 cli();
1154                 if (!(rth = ip_rt_hash_table[i]))
1155                 {
1156                         sti();
1157                         continue;
1158                 }
1159 
1160                 ip_rt_hash_table[i] = NULL;
1161                 sti();
1162 
1163                 for (; rth; rth=next)
1164                 {
1165                         next = rth->rt_next;
1166                         rt_cache_size--;
1167                         nr++;
1168                         rth->rt_next = NULL;
1169                         rt_free(rth);
1170                 }
1171 #if RT_CACHE_DEBUG >= 2
1172                 if (nr > 0)
1173                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1174 #endif
1175         }
1176 #if RT_CACHE_DEBUG >= 1
1177         if (rt_cache_size)
1178         {
1179                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1180                 rt_cache_size = 0;
1181         }
1182 #endif
1183 }
1184 
1185 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1186 {
1187         int i;
1188         unsigned expire = RT_CACHE_TIMEOUT>>1;
1189         struct rtable * rth, **rthp;
1190         unsigned long now = jiffies;
1191 
1192         for (;;)
1193         {
1194                 for (i=0; i<RT_HASH_DIVISOR; i++)
1195                 {
1196                         if (!ip_rt_hash_table[i])
1197                                 continue;
1198                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1199                         {
1200                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1201                                         continue;
1202                                 rt_cache_size--;
1203                                 cli();
1204                                 *rthp=rth->rt_next;
1205                                 rth->rt_next = NULL;
1206                                 sti();
1207                                 rt_free(rth);
1208                                 break;
1209                         }
1210                 }
1211                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1212                         return;
1213                 expire >>= 1;
1214         }
1215 }
1216 
1217 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1218 {
1219         unsigned long flags;
1220         struct rt_req * tail;
1221 
1222         save_flags(flags);
1223         cli();
1224         tail = *q;
1225         if (!tail)
1226                 rtr->rtr_next = rtr;
1227         else
1228         {
1229                 rtr->rtr_next = tail->rtr_next;
1230                 tail->rtr_next = rtr;
1231         }
1232         *q = rtr;
1233         restore_flags(flags);
1234         return;
1235 }
1236 
1237 /*
1238  * Caller should mask interrupts.
1239  */
1240 
1241 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1242 {
1243         struct rt_req * rtr;
1244 
1245         if (*q)
1246         {
1247                 rtr = (*q)->rtr_next;
1248                 (*q)->rtr_next = rtr->rtr_next;
1249                 if (rtr->rtr_next == rtr)
1250                         *q = NULL;
1251                 rtr->rtr_next = NULL;
1252                 return rtr;
1253         }
1254         return NULL;
1255 }
1256 
1257 /*
1258    Called with masked interrupts
1259  */
1260 
1261 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1262 {
1263         if (!ip_rt_lock)
1264         {
1265                 struct rt_req * rtr;
1266 
1267                 ip_rt_fast_lock();
1268 
1269                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1270                 {
1271                         sti();
1272                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1273                         kfree_s(rtr, sizeof(struct rt_req));
1274                         cli();
1275                 }
1276 
1277                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1278 
1279                 ip_rt_fast_unlock();
1280         }
1281 }
1282 
1283 /*
1284  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1285  */
1286 
1287 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1288                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1289 {
1290         int retval;
1291 
1292         while (ip_rt_lock)
1293                 sleep_on(&rt_wait);
1294         ip_rt_fast_lock();
1295         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1296         ip_rt_unlock();
1297         wake_up(&rt_wait);
1298         return retval;
1299 }
1300 
1301 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1302         __u32 gw, struct device *dev, unsigned short mss,
1303         unsigned long window, unsigned short irtt, short metric)
1304 {
1305         while (ip_rt_lock)
1306                 sleep_on(&rt_wait);
1307         ip_rt_fast_lock();
1308         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1309         ip_rt_unlock();
1310         wake_up(&rt_wait);
1311 }
1312 
1313 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1314 {
1315         while (ip_rt_lock)
1316                 sleep_on(&rt_wait);
1317         ip_rt_fast_lock();
1318         fib_flush_1(dev);
1319         ip_rt_unlock();
1320         wake_up(&rt_wait);
1321 }
1322 
1323 /*
1324    Called by ICMP module.
1325  */
1326 
1327 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1328 {
1329         struct rt_req * rtr;
1330         struct rtable * rt;
1331 
1332         rt = ip_rt_route(dst, 0);
1333         if (!rt)
1334                 return;
1335 
1336         if (rt->rt_gateway != src ||
1337             rt->rt_dev != dev ||
1338             ((gw^dev->pa_addr)&dev->pa_mask) ||
1339             ip_chk_addr(gw))
1340         {
1341                 ip_rt_put(rt);
1342                 return;
1343         }
1344         ip_rt_put(rt);
1345 
1346         ip_rt_fast_lock();
1347         if (ip_rt_lock == 1)
1348         {
1349                 rt_redirect_1(dst, gw, dev);
1350                 ip_rt_unlock();
1351                 return;
1352         }
1353 
1354         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1355         if (rtr)
1356         {
1357                 rtr->dst = dst;
1358                 rtr->gw = gw;
1359                 rtr->dev = dev;
1360                 rt_req_enqueue(&rt_backlog, rtr);
1361                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1362         }
1363         ip_rt_unlock();
1364 }
1365 
1366 
1367 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1368 {
1369         if (ip_rt_lock == 1)
1370         {
1371                 rt_garbage_collect_1();
1372                 return;
1373         }
1374         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1375 }
1376 
1377 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1378 {
1379         unsigned long   flags;
1380         struct rtable   **rthp;
1381         __u32           daddr = rth->rt_dst;
1382         unsigned long   now = jiffies;
1383 
1384 #if RT_CACHE_DEBUG >= 2
1385         if (ip_rt_lock != 1)
1386         {
1387                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1388                 return;
1389         }
1390 #endif
1391 
1392         save_flags(flags);
1393 
1394         if (rth->rt_dev->header_cache_bind)
1395         {
1396                 struct rtable * rtg = rth;
1397 
1398                 if (rth->rt_gateway != daddr)
1399                 {
1400                         ip_rt_fast_unlock();
1401                         rtg = ip_rt_route(rth->rt_gateway, 0);
1402                         ip_rt_fast_lock();
1403                 }
1404 
1405                 if (rtg)
1406                 {
1407                         if (rtg == rth)
1408                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1409                         else
1410                         {
1411                                 if (rtg->rt_hh)
1412                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1413                                 rth->rt_hh = rtg->rt_hh;
1414                                 ip_rt_put(rtg);
1415                         }
1416                 }
1417         }
1418 
1419         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1420                 rt_garbage_collect();
1421 
1422         cli();
1423         rth->rt_next = ip_rt_hash_table[hash];
1424 #if RT_CACHE_DEBUG >= 2
1425         if (rth->rt_next)
1426         {
1427                 struct rtable * trth;
1428                 printk("rt_cache @%02x: %08x", hash, daddr);
1429                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1430                         printk(" . %08x", trth->rt_dst);
1431                 printk("\n");
1432         }
1433 #endif
1434         ip_rt_hash_table[hash] = rth;
1435         rthp = &rth->rt_next;
1436         sti();
1437         rt_cache_size++;
1438 
1439         /*
1440          * Cleanup duplicate (and aged off) entries.
1441          */
1442 
1443         while ((rth = *rthp) != NULL)
1444         {
1445 
1446                 cli();
1447                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1448                     || rth->rt_dst == daddr)
1449                 {
1450                         *rthp = rth->rt_next;
1451                         rt_cache_size--;
1452                         sti();
1453 #if RT_CACHE_DEBUG >= 2
1454                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1455 #endif
1456                         rt_free(rth);
1457                         continue;
1458                 }
1459                 sti();
1460                 rthp = &rth->rt_next;
1461         }
1462         restore_flags(flags);
1463 }
1464 
1465 /*
1466    RT should be already locked.
1467    
1468    We could improve this by keeping a chain of say 32 struct rtable's
1469    last freed for fast recycling.
1470    
1471  */
1472 
1473 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1474 {
1475         unsigned hash = ip_rt_hash_code(daddr)^local;
1476         struct rtable * rth;
1477         struct fib_node * f;
1478         struct fib_info * fi;
1479         __u32 saddr;
1480 
1481 #if RT_CACHE_DEBUG >= 2
1482         printk("rt_cache miss @%08x\n", daddr);
1483 #endif
1484 
1485         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1486         if (!rth)
1487         {
1488                 ip_rt_unlock();
1489                 return NULL;
1490         }
1491 
1492         if (local)
1493                 f = fib_lookup_local(daddr);
1494         else
1495                 f = fib_lookup (daddr);
1496 
1497         if (f)
1498         {
1499                 fi = f->fib_info;
1500                 f->fib_use++;
1501         }
1502 
1503         if (!f || (fi->fib_flags & RTF_REJECT))
1504         {
1505 #ifdef CONFIG_KERNELD   
1506                 char wanted_route[20];
1507 #endif          
1508 #if RT_CACHE_DEBUG >= 2
1509                 printk("rt_route failed @%08x\n", daddr);
1510 #endif
1511                 ip_rt_unlock();
1512                 kfree_s(rth, sizeof(struct rtable));
1513 #ifdef CONFIG_KERNELD           
1514                 daddr=ntohl(daddr);
1515                 sprintf(wanted_route, "%d.%d.%d.%d",
1516                         (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1517                         (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1518                 kerneld_route(wanted_route);    /* Dynamic route request */
1519 #endif          
1520                 return NULL;
1521         }
1522 
1523         saddr = fi->fib_dev->pa_addr;
1524 
1525         if (daddr == fi->fib_dev->pa_addr)
1526         {
1527                 f->fib_use--;
1528                 if ((f = fib_loopback) != NULL)
1529                 {
1530                         f->fib_use++;
1531                         fi = f->fib_info;
1532                 }
1533         }
1534         
1535         if (!f)
1536         {
1537                 ip_rt_unlock();
1538                 kfree_s(rth, sizeof(struct rtable));
1539                 return NULL;
1540         }
1541 
1542         rth->rt_dst     = daddr;
1543         rth->rt_src     = saddr;
1544         rth->rt_lastuse = jiffies;
1545         rth->rt_refcnt  = 1;
1546         rth->rt_use     = 1;
1547         rth->rt_next    = NULL;
1548         rth->rt_hh      = NULL;
1549         rth->rt_gateway = fi->fib_gateway;
1550         rth->rt_dev     = fi->fib_dev;
1551         rth->rt_mtu     = fi->fib_mtu;
1552         rth->rt_window  = fi->fib_window;
1553         rth->rt_irtt    = fi->fib_irtt;
1554         rth->rt_tos     = f->fib_tos;
1555         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1556         if (local)
1557                 rth->rt_flags   |= RTF_LOCAL;
1558 
1559         if (!(rth->rt_flags & RTF_GATEWAY))
1560                 rth->rt_gateway = rth->rt_dst;
1561         /*
1562          *      Multicast or limited broadcast is never gatewayed.
1563          */
1564         if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
1565                 rth->rt_gateway = rth->rt_dst;
1566 
1567         if (ip_rt_lock == 1)
1568                 rt_cache_add(hash, rth);
1569         else
1570         {
1571                 rt_free(rth);
1572 #if RT_CACHE_DEBUG >= 1
1573                 printk("rt_cache: route to %08x was born dead\n", daddr);
1574 #endif
1575         }
1576 
1577         ip_rt_unlock();
1578         return rth;
1579 }
1580 
1581 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1582 {
1583         if (rt)
1584                 ATOMIC_DECR(&rt->rt_refcnt);
1585 }
1586 
1587 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1588 {
1589         struct rtable * rth;
1590 
1591         ip_rt_fast_lock();
1592 
1593         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1594         {
1595                 if (rth->rt_dst == daddr)
1596                 {
1597                         rth->rt_lastuse = jiffies;
1598                         ATOMIC_INCR(&rth->rt_use);
1599                         ATOMIC_INCR(&rth->rt_refcnt);
1600                         ip_rt_unlock();
1601                         return rth;
1602                 }
1603         }
1604         return ip_rt_slow_route (daddr, local);
1605 }
1606 
1607 /*
1608  *      Process a route add request from the user, or from a kernel
1609  *      task.
1610  */
1611  
1612 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1613 {
1614         int err;
1615         char * devname;
1616         struct device * dev = NULL;
1617         unsigned long flags;
1618         __u32 daddr, mask, gw;
1619         short metric;
1620 
1621         /*
1622          *      If a device is specified find it.
1623          */
1624          
1625         if ((devname = r->rt_dev) != NULL) 
1626         {
1627                 err = getname(devname, &devname);
1628                 if (err)
1629                         return err;
1630                 dev = dev_get(devname);
1631                 putname(devname);
1632                 if (!dev)
1633                         return -ENODEV;
1634         }
1635         
1636         /*
1637          *      If the device isn't INET, don't allow it
1638          */
1639 
1640         if (r->rt_dst.sa_family != AF_INET)
1641                 return -EAFNOSUPPORT;
1642 
1643         /*
1644          *      Make local copies of the important bits
1645          *      We decrement the metric by one for BSD compatibility.
1646          */
1647          
1648         flags = r->rt_flags;
1649         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1650         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1651         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1652         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1653 
1654         /*
1655          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1656          *      to indicate which iface. Not as clean as the nice Linux dev technique
1657          *      but people keep using it...  (and gated likes it ;))
1658          */
1659          
1660         if (!dev && (flags & RTF_GATEWAY)) 
1661         {
1662                 struct device *dev2;
1663                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1664                 {
1665                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1666                         {
1667                                 flags &= ~RTF_GATEWAY;
1668                                 dev = dev2;
1669                                 break;
1670                         }
1671                 }
1672         }
1673 
1674         /*
1675          *      Ignore faulty masks
1676          */
1677          
1678         if (bad_mask(mask, daddr))
1679                 mask=0;
1680 
1681         /*
1682          *      Set the mask to nothing for host routes.
1683          */
1684          
1685         if (flags & RTF_HOST)
1686                 mask = 0xffffffff;
1687         else if (mask && r->rt_genmask.sa_family != AF_INET)
1688                 return -EAFNOSUPPORT;
1689 
1690         /*
1691          *      You can only gateway IP via IP..
1692          */
1693          
1694         if (flags & RTF_GATEWAY) 
1695         {
1696                 if (r->rt_gateway.sa_family != AF_INET)
1697                         return -EAFNOSUPPORT;
1698                 if (!dev)
1699                         dev = get_gw_dev(gw);
1700         } 
1701         else if (!dev)
1702                 dev = ip_dev_check(daddr);
1703 
1704         /*
1705          *      Unknown device.
1706          */
1707          
1708         if (dev == NULL)
1709                 return -ENETUNREACH;
1710 
1711         /*
1712          *      Add the route
1713          */
1714 
1715         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1716         return 0;
1717 }
1718 
1719 
1720 /*
1721  *      Remove a route, as requested by the user.
1722  */
1723 
1724 int ip_rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1725 {
1726         struct sockaddr_in *trg;
1727         struct sockaddr_in *msk;
1728         struct sockaddr_in *gtw;
1729         char *devname;
1730         int err;
1731         struct device * dev = NULL;
1732 
1733         trg = (struct sockaddr_in *) &r->rt_dst;
1734         msk = (struct sockaddr_in *) &r->rt_genmask;
1735         gtw = (struct sockaddr_in *) &r->rt_gateway;
1736         if ((devname = r->rt_dev) != NULL) 
1737         {
1738                 err = getname(devname, &devname);
1739                 if (err)
1740                         return err;
1741                 dev = dev_get(devname);
1742                 putname(devname);
1743                 if (!dev)
1744                         return -ENODEV;
1745         }
1746         /*
1747          * metric can become negative here if it wasn't filled in
1748          * but that's a fortunate accident; we really use that in rt_del.
1749          */
1750         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1751                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1752         return err;
1753 }
1754 
1755 /*
1756  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1757  */
1758  
1759 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1760 {
1761         int err;
1762         struct rtentry rt;
1763 
1764         switch(cmd) 
1765         {
1766                 case SIOCADDRT:         /* Add a route */
1767                 case SIOCDELRT:         /* Delete a route */
1768                         if (!suser())
1769                                 return -EPERM;
1770                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1771                         if (err)
1772                                 return err;
1773                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1774                         return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt);
1775         }
1776 
1777         return -EINVAL;
1778 }
1779 
1780 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1781 {
1782         /* Thanks! */
1783         return;
1784 }
1785 

/* [previous][next][first][last][top][bottom][index][help] */