root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. ip_rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasnt being copied right.
  42  *              Bjorn Ekwall    :       Kerneld route support.
  43  *              Alan Cox        :       Multicast fixed (I hope)
  44  *
  45  *              This program is free software; you can redistribute it and/or
  46  *              modify it under the terms of the GNU General Public License
  47  *              as published by the Free Software Foundation; either version
  48  *              2 of the License, or (at your option) any later version.
  49  */
  50 
  51 #include <linux/config.h>
  52 #include <asm/segment.h>
  53 #include <asm/system.h>
  54 #include <asm/bitops.h>
  55 #include <linux/types.h>
  56 #include <linux/kernel.h>
  57 #include <linux/sched.h>
  58 #include <linux/mm.h>
  59 #include <linux/string.h>
  60 #include <linux/socket.h>
  61 #include <linux/sockios.h>
  62 #include <linux/errno.h>
  63 #include <linux/in.h>
  64 #include <linux/inet.h>
  65 #include <linux/netdevice.h>
  66 #include <linux/if_arp.h>
  67 #include <net/ip.h>
  68 #include <net/protocol.h>
  69 #include <net/route.h>
  70 #include <net/tcp.h>
  71 #include <linux/skbuff.h>
  72 #include <net/sock.h>
  73 #include <net/icmp.h>
  74 #include <net/netlink.h>
  75 #ifdef CONFIG_KERNELD
  76 #include <linux/kerneld.h>
  77 #endif
  78 
  79 /*
  80  * Forwarding Information Base definitions.
  81  */
  82 
  83 struct fib_node
  84 {
  85         struct fib_node         *fib_next;
  86         __u32                   fib_dst;
  87         unsigned long           fib_use;
  88         struct fib_info         *fib_info;
  89         short                   fib_metric;
  90         unsigned char           fib_tos;
  91 };
  92 
  93 /*
  94  * This structure contains data shared by many of routes.
  95  */     
  96 
  97 struct fib_info
  98 {
  99         struct fib_info         *fib_next;
 100         struct fib_info         *fib_prev;
 101         __u32                   fib_gateway;
 102         struct device           *fib_dev;
 103         int                     fib_refcnt;
 104         unsigned long           fib_window;
 105         unsigned short          fib_flags;
 106         unsigned short          fib_mtu;
 107         unsigned short          fib_irtt;
 108 };
 109 
 110 struct fib_zone
 111 {
 112         struct fib_zone *fz_next;
 113         struct fib_node **fz_hash_table;
 114         struct fib_node *fz_list;
 115         int             fz_nent;
 116         int             fz_logmask;
 117         __u32           fz_mask;
 118 };
 119 
 120 static struct fib_zone  *fib_zones[33];
 121 static struct fib_zone  *fib_zone_list;
 122 static struct fib_node  *fib_loopback = NULL;
 123 static struct fib_info  *fib_info_list;
 124 
 125 /*
 126  * Backlogging.
 127  */
 128 
 129 #define RT_BH_REDIRECT          0
 130 #define RT_BH_GARBAGE_COLLECT   1
 131 #define RT_BH_FREE              2
 132 
 133 struct rt_req
 134 {
 135         struct rt_req * rtr_next;
 136         struct device *dev;
 137         __u32 dst;
 138         __u32 gw;
 139         unsigned char tos;
 140 };
 141 
 142 int                     ip_rt_lock;
 143 unsigned                ip_rt_bh_mask;
 144 static struct rt_req    *rt_backlog;
 145 
 146 /*
 147  * Route cache.
 148  */
 149 
 150 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 151 static int              rt_cache_size;
 152 static struct rtable    *rt_free_queue;
 153 struct wait_queue       *rt_wait;
 154 
 155 static void rt_kick_backlog(void);
 156 static void rt_cache_add(unsigned hash, struct rtable * rth);
 157 static void rt_cache_flush(void);
 158 static void rt_garbage_collect_1(void);
 159 
 160 /* 
 161  * Evaluate mask length.
 162  */
 163 
 164 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         if (!(mask = ntohl(mask)))
 167                 return 32;
 168         return ffz(~mask);
 169 }
 170 
 171 /* 
 172  * Create mask from length.
 173  */
 174 
 175 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 176 {
 177         if (logmask >= 32)
 178                 return 0;
 179         return htonl(~((1<<logmask)-1));
 180 }
 181 
 182 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 183 {
 184         return ip_rt_hash_code(ntohl(dst)>>logmask);
 185 }
 186 
 187 /*
 188  * Free FIB node.
 189  */
 190 
 191 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 192 {
 193         struct fib_info * fi = f->fib_info;
 194         if (!--fi->fib_refcnt)
 195         {
 196 #if RT_CACHE_DEBUG >= 2
 197                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 198 #endif
 199                 if (fi->fib_next)
 200                         fi->fib_next->fib_prev = fi->fib_prev;
 201                 if (fi->fib_prev)
 202                         fi->fib_prev->fib_next = fi->fib_next;
 203                 if (fi == fib_info_list)
 204                         fib_info_list = fi->fib_next;
 205         }
 206         kfree_s(f, sizeof(struct fib_node));
 207 }
 208 
 209 /*
 210  * Find gateway route by address.
 211  */
 212 
 213 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 214 {
 215         struct fib_zone * fz;
 216         struct fib_node * f;
 217 
 218         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 219         {
 220                 if (fz->fz_hash_table)
 221                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 222                 else
 223                         f = fz->fz_list;
 224                 
 225                 for ( ; f; f = f->fib_next)
 226                 {
 227                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 228                                 continue;
 229                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 230                                 return NULL;
 231                         return f;
 232                 }
 233         }
 234         return NULL;
 235 }
 236 
 237 /*
 238  * Find local route by address.
 239  * FIXME: I use "longest match" principle. If destination
 240  *        has some non-local route, I'll not search shorter matches.
 241  *        It's possible, I'm wrong, but I wanted to prevent following
 242  *        situation:
 243  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 244  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 245  *        (Two ethernets connected by serial line, one is small and other is large)
 246  *        Host 193.233.7.129 is locally unreachable,
 247  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 248  *
 249  */
 250 
 251 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253         struct fib_zone * fz;
 254         struct fib_node * f;
 255 
 256         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 257         {
 258                 int longest_match_found = 0;
 259 
 260                 if (fz->fz_hash_table)
 261                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 262                 else
 263                         f = fz->fz_list;
 264                 
 265                 for ( ; f; f = f->fib_next)
 266                 {
 267                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 268                                 continue;
 269                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 270                                 return f;
 271                         longest_match_found = 1;
 272                 }
 273                 if (longest_match_found)
 274                         return NULL;
 275         }
 276         return NULL;
 277 }
 278 
 279 /*
 280  * Main lookup routine.
 281  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 282  *      by user. It doesn't route non-CIDR broadcasts by default.
 283  *
 284  *      F.e.
 285  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 286  *      is valid, but if you really are not able (not allowed, do not want) to
 287  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 288  *              route add -host 193.233.7.255 eth0
 289  */
 290 
 291 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 292 {
 293         struct fib_zone * fz;
 294         struct fib_node * f;
 295 
 296         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 297         {
 298                 if (fz->fz_hash_table)
 299                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 300                 else
 301                         f = fz->fz_list;
 302                 
 303                 for ( ; f; f = f->fib_next)
 304                 {
 305                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 306                                 continue;
 307                         return f;
 308                 }
 309         }
 310         return NULL;
 311 }
 312 
 313 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 314 {
 315         struct fib_node * f;
 316         f = fib_lookup_gateway(gw);
 317         if (f)
 318                 return f->fib_info->fib_dev;
 319         return NULL;
 320 }
 321 
 322 /*
 323  *      Used by 'rt_add()' when we can't get the netmask any other way..
 324  *
 325  *      If the lower byte or two are zero, we guess the mask based on the
 326  *      number of zero 8-bit net numbers, otherwise we use the "default"
 327  *      masks judging by the destination address and our device netmask.
 328  */
 329  
 330 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 331 {
 332         dst = ntohl(dst);
 333         if (IN_CLASSA(dst))
 334                 return htonl(IN_CLASSA_NET);
 335         if (IN_CLASSB(dst))
 336                 return htonl(IN_CLASSB_NET);
 337         return htonl(IN_CLASSC_NET);
 338 }
 339 
 340 
 341 /*
 342  *      If no mask is specified then generate a default entry.
 343  */
 344 
 345 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 346 {
 347         __u32 mask;
 348 
 349         if (!dst)
 350                 return 0;
 351         mask = default_mask(dst);
 352         if ((dst ^ dev->pa_addr) & mask)
 353                 return mask;
 354         return dev->pa_mask;
 355 }
 356 
 357 
 358 /*
 359  *      Check if a mask is acceptable.
 360  */
 361  
 362 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364         if (addr & (mask = ~mask))
 365                 return 1;
 366         mask = ntohl(mask);
 367         if (mask & (mask+1))
 368                 return 1;
 369         return 0;
 370 }
 371 
 372 
 373 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 374                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 375 {
 376         struct fib_node *f;
 377         int found=0;
 378 
 379         while((f = *fp) != NULL) 
 380         {
 381                 struct fib_info * fi = f->fib_info;
 382 
 383                 /*
 384                  *      Make sure the destination and netmask match.
 385                  *      metric, gateway and device are also checked
 386                  *      if they were specified.
 387                  */
 388                 if (f->fib_dst != dst ||
 389                     (gtw && fi->fib_gateway != gtw) ||
 390                     (metric >= 0 && f->fib_metric != metric) ||
 391                     (dev && fi->fib_dev != dev) )
 392                 {
 393                         fp = &f->fib_next;
 394                         continue;
 395                 }
 396                 cli();
 397                 *fp = f->fib_next;
 398                 if (fib_loopback == f)
 399                         fib_loopback = NULL;
 400                 sti();
 401                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 402                 fib_free_node(f);
 403                 found++;
 404         }
 405         return found;
 406 }
 407 
 408 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 409                 struct device * dev, __u32 gtw, short flags, short metric)
 410 {
 411         struct fib_node **fp;
 412         struct fib_zone *fz;
 413         int found=0;
 414 
 415         if (!mask)
 416         {
 417                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 418                 {
 419                         int tmp;
 420                         if (fz->fz_hash_table)
 421                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 422                         else
 423                                 fp = &fz->fz_list;
 424 
 425                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 426                         fz->fz_nent -= tmp;
 427                         found += tmp;
 428                 }
 429         } 
 430         else
 431         {
 432                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 433                 {
 434                         if (fz->fz_hash_table)
 435                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 436                         else
 437                                 fp = &fz->fz_list;
 438         
 439                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 440                         fz->fz_nent -= found;
 441                 }
 442         }
 443 
 444         if (found)
 445         {
 446                 rt_cache_flush();
 447                 return 0;
 448         }
 449         return -ESRCH;
 450 }
 451 
 452 
 453 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 454                                          unsigned short flags, unsigned short mss,
 455                                          unsigned long window, unsigned short irtt)
 456 {
 457         struct fib_info * fi;
 458 
 459         if (!(flags & RTF_MSS))
 460         {
 461                 mss = dev->mtu;
 462 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 463                 /*
 464                  *      If MTU was not specified, use default.
 465                  *      If you want to increase MTU for some net (local subnet)
 466                  *      use "route add .... mss xxx".
 467                  *
 468                  *      The MTU isnt currently always used and computed as it
 469                  *      should be as far as I can tell. [Still verifying this is right]
 470                  */
 471                 if ((flags & RTF_GATEWAY) && mss > 576)
 472                         mss = 576;
 473 #endif
 474         }
 475         if (!(flags & RTF_WINDOW))
 476                 window = 0;
 477         if (!(flags & RTF_IRTT))
 478                 irtt = 0;
 479 
 480         for (fi=fib_info_list; fi; fi = fi->fib_next)
 481         {
 482                 if (fi->fib_gateway != gw ||
 483                     fi->fib_dev != dev  ||
 484                     fi->fib_flags != flags ||
 485                     fi->fib_mtu != mss ||
 486                     fi->fib_window != window ||
 487                     fi->fib_irtt != irtt)
 488                         continue;
 489                 fi->fib_refcnt++;
 490 #if RT_CACHE_DEBUG >= 2
 491                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 492 #endif
 493                 return fi;
 494         }
 495         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 496         if (!fi)
 497                 return NULL;
 498         memset(fi, 0, sizeof(struct fib_info));
 499         fi->fib_flags = flags;
 500         fi->fib_dev = dev;
 501         fi->fib_gateway = gw;
 502         fi->fib_mtu = mss;
 503         fi->fib_window = window;
 504         fi->fib_refcnt++;
 505         fi->fib_next = fib_info_list;
 506         fi->fib_prev = NULL;
 507         fi->fib_irtt = irtt;
 508         if (fib_info_list)
 509                 fib_info_list->fib_prev = fi;
 510         fib_info_list = fi;
 511 #if RT_CACHE_DEBUG >= 2
 512         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 513 #endif
 514         return fi;
 515 }
 516 
 517 
 518 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 519         __u32 gw, struct device *dev, unsigned short mss,
 520         unsigned long window, unsigned short irtt, short metric)
 521 {
 522         struct fib_node *f, *f1;
 523         struct fib_node **fp;
 524         struct fib_node **dup_fp = NULL;
 525         struct fib_zone * fz;
 526         struct fib_info * fi;
 527         int logmask;
 528 
 529         if (flags & RTF_HOST) 
 530                 mask = 0xffffffff;
 531         /*
 532          * If mask is not specified, try to guess it.
 533          */
 534         else if (!mask)
 535         {
 536                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 537                 {
 538                         mask = dev->pa_mask;
 539                         flags &= ~RTF_GATEWAY;
 540                         if (flags & RTF_DYNAMIC) 
 541                         {
 542                                 printk("Dynamic route to my own net rejected\n");
 543                                 return;
 544                         }
 545                 } 
 546                 else
 547                         mask = guess_mask(dst, dev);
 548                 dst &= mask;
 549         }
 550         
 551         /*
 552          *      A gateway must be reachable and not a local address
 553          */
 554          
 555         if (gw == dev->pa_addr)
 556                 flags &= ~RTF_GATEWAY;
 557                 
 558         if (flags & RTF_GATEWAY) 
 559         {
 560                 /*
 561                  *      Don't try to add a gateway we can't reach.. 
 562                  *      Tunnel devices are exempt from this rule.
 563                  */
 564                  
 565                 if ((dev != get_gw_dev(gw)) && dev->type!=ARPHRD_TUNNEL)
 566                         return;
 567                         
 568                 flags |= RTF_GATEWAY;
 569         } 
 570         else
 571                 gw = 0;
 572                 
 573         /*
 574          *      Allocate an entry and fill it in.
 575          */
 576          
 577         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 578         if (f == NULL)
 579                 return;
 580 
 581         memset(f, 0, sizeof(struct fib_node));
 582         f->fib_dst = dst;
 583         f->fib_metric = metric;
 584         f->fib_tos    = 0;
 585 
 586         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 587         {
 588                 kfree_s(f, sizeof(struct fib_node));
 589                 return;
 590         }
 591         f->fib_info = fi;
 592 
 593         logmask = rt_logmask(mask);
 594         fz = fib_zones[logmask];
 595 
 596 
 597         if (!fz)
 598         {
 599                 int i;
 600                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 601                 if (!fz)
 602                 {
 603                         fib_free_node(f);
 604                         return;
 605                 }
 606                 memset(fz, 0, sizeof(struct fib_zone));
 607                 fz->fz_logmask = logmask;
 608                 fz->fz_mask = mask;
 609                 for (i=logmask-1; i>=0; i--)
 610                         if (fib_zones[i])
 611                                 break;
 612                 cli();
 613                 if (i<0)
 614                 {
 615                         fz->fz_next = fib_zone_list;
 616                         fib_zone_list = fz;
 617                 }
 618                 else
 619                 {
 620                         fz->fz_next = fib_zones[i]->fz_next;
 621                         fib_zones[i]->fz_next = fz;
 622                 }
 623                 fib_zones[logmask] = fz;
 624                 sti();
 625         }
 626 
 627         /*
 628          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 629          */
 630 
 631         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 632         {
 633                 struct fib_node ** ht;
 634 #if RT_CACHE_DEBUG
 635                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 636 #endif
 637                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 638 
 639                 if (ht)
 640                 {
 641                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 642                         cli();
 643                         f1 = fz->fz_list;
 644                         while (f1)
 645                         {
 646                                 struct fib_node * next;
 647                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 648                                 next = f1->fib_next;
 649                                 f1->fib_next = ht[hash];
 650                                 ht[hash] = f1;
 651                                 f1 = next;
 652                         }
 653                         fz->fz_list = NULL;
 654                         fz->fz_hash_table = ht; 
 655                         sti();
 656                 }
 657         }
 658 
 659         if (fz->fz_hash_table)
 660                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 661         else
 662                 fp = &fz->fz_list;
 663 
 664         /*
 665          * Scan list to find the first route with the same destination
 666          */
 667         while ((f1 = *fp) != NULL)
 668         {
 669                 if (f1->fib_dst == dst)
 670                         break;
 671                 fp = &f1->fib_next;
 672         }
 673 
 674         /*
 675          * Find route with the same destination and less (or equal) metric.
 676          */
 677         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 678         {
 679                 if (f1->fib_metric >= metric)
 680                         break;
 681                 /*
 682                  *      Record route with the same destination and gateway,
 683                  *      but less metric. We'll delete it 
 684                  *      after instantiation of new route.
 685                  */
 686                 if (f1->fib_info->fib_gateway == gw)
 687                         dup_fp = fp;
 688                 fp = &f1->fib_next;
 689         }
 690 
 691         /*
 692          * Is it already present?
 693          */
 694 
 695         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 696         {
 697                 fib_free_node(f);
 698                 return;
 699         }
 700         
 701         /*
 702          * Insert new entry to the list.
 703          */
 704 
 705         cli();
 706         f->fib_next = f1;
 707         *fp = f;
 708         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 709                 fib_loopback = f;
 710         sti();
 711         fz->fz_nent++;
 712         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 713 
 714         /*
 715          *      Delete route with the same destination and gateway.
 716          *      Note that we should have at most one such route.
 717          */
 718         if (dup_fp)
 719                 fp = dup_fp;
 720         else
 721                 fp = &f->fib_next;
 722 
 723         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 724         {
 725                 if (f1->fib_info->fib_gateway == gw)
 726                 {
 727                         cli();
 728                         *fp = f1->fib_next;
 729                         if (fib_loopback == f1)
 730                                 fib_loopback = NULL;
 731                         sti();
 732                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 733                         fib_free_node(f1);
 734                         fz->fz_nent--;
 735                         break;
 736                 }
 737                 fp = &f1->fib_next;
 738         }
 739         rt_cache_flush();
 740         return;
 741 }
 742 
 743 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 744 {
 745         int found = 0;
 746         struct fib_node *f;
 747 
 748         while ((f = *fp) != NULL) {
 749                 if (f->fib_info->fib_dev != dev) {
 750                         fp = &f->fib_next;
 751                         continue;
 752                 }
 753                 cli();
 754                 *fp = f->fib_next;
 755                 if (fib_loopback == f)
 756                         fib_loopback = NULL;
 757                 sti();
 758                 fib_free_node(f);
 759                 found++;
 760         }
 761         return found;
 762 }
 763 
 764 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 765 {
 766         struct fib_zone *fz;
 767         int found = 0;
 768 
 769         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 770         {
 771                 if (fz->fz_hash_table)
 772                 {
 773                         int i;
 774                         int tmp = 0;
 775                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 776                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 777                         fz->fz_nent -= tmp;
 778                         found += tmp;
 779                 }
 780                 else
 781                 {
 782                         int tmp;
 783                         tmp = rt_flush_list(&fz->fz_list, dev);
 784                         fz->fz_nent -= tmp;
 785                         found += tmp;
 786                 }
 787         }
 788                 
 789         if (found)
 790                 rt_cache_flush();
 791 }
 792 
 793 
 794 /* 
 795  *      Called from the PROCfs module. This outputs /proc/net/route.
 796  *
 797  *      We preserve the old format but pad the buffers out. This means that
 798  *      we can spin over the other entries as we read them. Remember the
 799  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 800  *      about 7Mb of data). To do that ok we will need to also cache the
 801  *      last route we got to (reads will generally be following on from
 802  *      one another without gaps).
 803  */
 804  
 805 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 806 {
 807         struct fib_zone *fz;
 808         struct fib_node *f;
 809         int len=0;
 810         off_t pos=0;
 811         char temp[129];
 812         int i;
 813         
 814         pos = 128;
 815 
 816         if (offset<128)
 817         {
 818                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 819                 len = 128;
 820         }
 821         
 822         while  (ip_rt_lock)
 823                 sleep_on(&rt_wait);
 824         ip_rt_fast_lock();
 825 
 826         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 827         {
 828                 int maxslot;
 829                 struct fib_node ** fp;
 830 
 831                 if (fz->fz_nent == 0)
 832                         continue;
 833 
 834                 if (pos + 128*fz->fz_nent <= offset)
 835                 {
 836                         pos += 128*fz->fz_nent;
 837                         len = 0;
 838                         continue;
 839                 }
 840 
 841                 if (fz->fz_hash_table)
 842                 {
 843                         maxslot = RTZ_HASH_DIVISOR;
 844                         fp      = fz->fz_hash_table;
 845                 }
 846                 else
 847                 {
 848                         maxslot = 1;
 849                         fp      = &fz->fz_list;
 850                 }
 851                         
 852                 for (i=0; i < maxslot; i++, fp++)
 853                 {
 854                         
 855                         for (f = *fp; f; f = f->fib_next) 
 856                         {
 857                                 struct fib_info * fi;
 858                                 /*
 859                                  *      Spin through entries until we are ready
 860                                  */
 861                                 pos += 128;
 862 
 863                                 if (pos <= offset)
 864                                 {
 865                                         len=0;
 866                                         continue;
 867                                 }
 868                                         
 869                                 fi = f->fib_info;
 870                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 871                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 872                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 873                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 874                                 sprintf(buffer+len,"%-127s\n",temp);
 875 
 876                                 len += 128;
 877                                 if (pos >= offset+length)
 878                                         goto done;
 879                         }
 880                 }
 881         }
 882 
 883 done:
 884         ip_rt_unlock();
 885         wake_up(&rt_wait);
 886         
 887         *start = buffer+len-(pos-offset);
 888         len = pos - offset;
 889         if (len>length)
 890                 len = length;
 891         return len;
 892 }
 893 
 894 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 895 {
 896         int len=0;
 897         off_t pos=0;
 898         char temp[129];
 899         struct rtable *r;
 900         int i;
 901 
 902         pos = 128;
 903 
 904         if (offset<128)
 905         {
 906                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP");
 907                 len = 128;
 908         }
 909         
 910         
 911         while  (ip_rt_lock)
 912                 sleep_on(&rt_wait);
 913         ip_rt_fast_lock();
 914 
 915         for (i = 0; i<RT_HASH_DIVISOR; i++)
 916         {
 917                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 918                 {
 919                         /*
 920                          *      Spin through entries until we are ready
 921                          */
 922                         pos += 128;
 923 
 924                         if (pos <= offset)
 925                         {
 926                                 len = 0;
 927                                 continue;
 928                         }
 929                                         
 930                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
 931                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 932                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 933                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 934                         sprintf(buffer+len,"%-127s\n",temp);
 935                         len += 128;
 936                         if (pos >= offset+length)
 937                                 goto done;
 938                 }
 939         }
 940 
 941 done:
 942         ip_rt_unlock();
 943         wake_up(&rt_wait);
 944         
 945         *start = buffer+len-(pos-offset);
 946         len = pos-offset;
 947         if (len>length)
 948                 len = length;
 949         return len;
 950 }
 951 
 952 
 953 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 954 {
 955         unsigned long flags;
 956 
 957         save_flags(flags);
 958         cli();
 959         if (!rt->rt_refcnt)
 960         {
 961                 struct hh_cache * hh = rt->rt_hh;
 962                 rt->rt_hh = NULL;
 963                 if (hh && !--hh->hh_refcnt)
 964                 {
 965                         restore_flags(flags);
 966                         kfree_s(hh, sizeof(struct hh_cache));
 967                 }
 968                 restore_flags(flags);
 969                 kfree_s(rt, sizeof(struct rt_table));
 970                 return;
 971         }
 972         rt->rt_next = rt_free_queue;
 973         rt->rt_flags &= ~RTF_UP;
 974         rt_free_queue = rt;
 975         ip_rt_bh_mask |= RT_BH_FREE;
 976 #if RT_CACHE_DEBUG >= 2
 977         printk("rt_free: %08x\n", rt->rt_dst);
 978 #endif
 979         restore_flags(flags);
 980 }
 981 
 982 /*
 983  * RT "bottom half" handlers. Called with masked inetrrupts.
 984  */
 985 
 986 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 987 {
 988         struct rtable *rt, **rtp;
 989 
 990         rtp = &rt_free_queue;
 991 
 992         while ((rt = *rtp) != NULL)
 993         {
 994                 if  (!rt->rt_refcnt)
 995                 {
 996                         struct hh_cache * hh = rt->rt_hh;
 997 #if RT_CACHE_DEBUG >= 2
 998                         __u32 daddr = rt->rt_dst;
 999 #endif
1000                         *rtp = rt->rt_next;
1001                         rt->rt_hh = NULL;
1002                         if (hh && !--hh->hh_refcnt)
1003                         {
1004                                 sti();
1005                                 kfree_s(hh, sizeof(struct hh_cache));
1006                         }
1007                         sti();
1008                         kfree_s(rt, sizeof(struct rt_table));
1009 #if RT_CACHE_DEBUG >= 2
1010                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1011 #endif
1012                         cli();
1013                         continue;
1014                 }
1015                 rtp = &rt->rt_next;
1016         }
1017 }
1018 
1019 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1020         unsigned long flags;
1021         save_flags(flags);
1022         cli();
1023         if (ip_rt_bh_mask && !ip_rt_lock)
1024         {
1025                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1026                         rt_kick_backlog();
1027 
1028                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1029                 {
1030                         ip_rt_fast_lock();
1031                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1032                         sti();
1033                         rt_garbage_collect_1();
1034                         cli();
1035                         ip_rt_fast_unlock();
1036                 }
1037 
1038                 if (ip_rt_bh_mask & RT_BH_FREE)
1039                         rt_kick_free_queue();
1040         }
1041         restore_flags(flags);
1042 }
1043 
1044 
1045 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1046 {
1047         ip_rt_fast_lock();
1048         if (ip_rt_lock == 1)
1049         {
1050                 int i;
1051                 struct rtable *rth, **rthp;
1052                 unsigned long flags;
1053                 unsigned long now = jiffies;
1054 
1055                 save_flags(flags);
1056                 for (i=0; i<RT_HASH_DIVISOR; i++)
1057                 {
1058                         rthp = &ip_rt_hash_table[i];
1059 
1060                         while ((rth = *rthp) != NULL)
1061                         {
1062                                 struct rtable * rth_next = rth->rt_next;
1063 
1064                                 /*
1065                                  * Cleanup aged off entries.
1066                                  */
1067 
1068                                 cli();
1069                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1070                                 {
1071                                         *rthp = rth_next;
1072                                         sti();
1073                                         rt_cache_size--;
1074 #if RT_CACHE_DEBUG >= 2
1075                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1076 #endif
1077                                         rt_free(rth);
1078                                         continue;
1079                                 }
1080                                 sti();
1081 
1082                                 if (!rth_next)
1083                                         break;
1084 
1085                                 /*
1086                                  * LRU ordering.
1087                                  */
1088 
1089                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD < rth_next->rt_lastuse ||
1090                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1091                                      rth->rt_use < rth_next->rt_use))
1092                                 {
1093 #if RT_CACHE_DEBUG >= 2
1094                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1095 #endif
1096                                         cli();
1097                                         *rthp = rth_next;
1098                                         rth->rt_next = rth_next->rt_next;
1099                                         rth_next->rt_next = rth;
1100                                         sti();
1101                                         rthp = &rth_next->rt_next;
1102                                         continue;
1103                                 }
1104                                 rthp = &rth->rt_next;
1105                         }
1106                 }
1107                 restore_flags(flags);
1108                 rt_kick_free_queue();
1109         }
1110         ip_rt_unlock();
1111 }
1112 
1113 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1114 {
1115         struct rtable *rt;
1116         unsigned long hash = ip_rt_hash_code(dst);
1117 
1118         if (gw == dev->pa_addr)
1119                 return;
1120         if (dev != get_gw_dev(gw))
1121                 return;
1122         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1123         if (rt == NULL) 
1124                 return;
1125         memset(rt, 0, sizeof(struct rtable));
1126         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1127         rt->rt_dst = dst;
1128         rt->rt_dev = dev;
1129         rt->rt_gateway = gw;
1130         rt->rt_src = dev->pa_addr;
1131         rt->rt_mtu = dev->mtu;
1132 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1133         if (dev->mtu > 576)
1134                 rt->rt_mtu = 576;
1135 #endif
1136         rt->rt_lastuse  = jiffies;
1137         rt->rt_refcnt  = 1;
1138         rt_cache_add(hash, rt);
1139         ip_rt_put(rt);
1140         return;
1141 }
1142 
1143 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1144 {
1145         int i;
1146         struct rtable * rth, * next;
1147 
1148         for (i=0; i<RT_HASH_DIVISOR; i++)
1149         {
1150                 int nr=0;
1151 
1152                 cli();
1153                 if (!(rth = ip_rt_hash_table[i]))
1154                 {
1155                         sti();
1156                         continue;
1157                 }
1158 
1159                 ip_rt_hash_table[i] = NULL;
1160                 sti();
1161 
1162                 for (; rth; rth=next)
1163                 {
1164                         next = rth->rt_next;
1165                         rt_cache_size--;
1166                         nr++;
1167                         rth->rt_next = NULL;
1168                         rt_free(rth);
1169                 }
1170 #if RT_CACHE_DEBUG >= 2
1171                 if (nr > 0)
1172                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1173 #endif
1174         }
1175 #if RT_CACHE_DEBUG >= 1
1176         if (rt_cache_size)
1177         {
1178                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1179                 rt_cache_size = 0;
1180         }
1181 #endif
1182 }
1183 
1184 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1185 {
1186         int i;
1187         unsigned expire = RT_CACHE_TIMEOUT>>1;
1188         struct rtable * rth, **rthp;
1189         unsigned long now = jiffies;
1190 
1191         for (;;)
1192         {
1193                 for (i=0; i<RT_HASH_DIVISOR; i++)
1194                 {
1195                         if (!ip_rt_hash_table[i])
1196                                 continue;
1197                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1198                         {
1199                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1200                                         continue;
1201                                 rt_cache_size--;
1202                                 cli();
1203                                 *rthp=rth->rt_next;
1204                                 rth->rt_next = NULL;
1205                                 sti();
1206                                 rt_free(rth);
1207                                 break;
1208                         }
1209                 }
1210                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1211                         return;
1212                 expire >>= 1;
1213         }
1214 }
1215 
1216 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1217 {
1218         unsigned long flags;
1219         struct rt_req * tail;
1220 
1221         save_flags(flags);
1222         cli();
1223         tail = *q;
1224         if (!tail)
1225                 rtr->rtr_next = rtr;
1226         else
1227         {
1228                 rtr->rtr_next = tail->rtr_next;
1229                 tail->rtr_next = rtr;
1230         }
1231         *q = rtr;
1232         restore_flags(flags);
1233         return;
1234 }
1235 
1236 /*
1237  * Caller should mask interrupts.
1238  */
1239 
1240 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1241 {
1242         struct rt_req * rtr;
1243 
1244         if (*q)
1245         {
1246                 rtr = (*q)->rtr_next;
1247                 (*q)->rtr_next = rtr->rtr_next;
1248                 if (rtr->rtr_next == rtr)
1249                         *q = NULL;
1250                 rtr->rtr_next = NULL;
1251                 return rtr;
1252         }
1253         return NULL;
1254 }
1255 
1256 /*
1257    Called with masked interrupts
1258  */
1259 
1260 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1261 {
1262         if (!ip_rt_lock)
1263         {
1264                 struct rt_req * rtr;
1265 
1266                 ip_rt_fast_lock();
1267 
1268                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1269                 {
1270                         sti();
1271                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1272                         kfree_s(rtr, sizeof(struct rt_req));
1273                         cli();
1274                 }
1275 
1276                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1277 
1278                 ip_rt_fast_unlock();
1279         }
1280 }
1281 
1282 /*
1283  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1284  */
1285 
1286 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1287                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1288 {
1289         int retval;
1290 
1291         while (ip_rt_lock)
1292                 sleep_on(&rt_wait);
1293         ip_rt_fast_lock();
1294         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1295         ip_rt_unlock();
1296         wake_up(&rt_wait);
1297         return retval;
1298 }
1299 
1300 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1301         __u32 gw, struct device *dev, unsigned short mss,
1302         unsigned long window, unsigned short irtt, short metric)
1303 {
1304         while (ip_rt_lock)
1305                 sleep_on(&rt_wait);
1306         ip_rt_fast_lock();
1307         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1308         ip_rt_unlock();
1309         wake_up(&rt_wait);
1310 }
1311 
1312 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1313 {
1314         while (ip_rt_lock)
1315                 sleep_on(&rt_wait);
1316         ip_rt_fast_lock();
1317         fib_flush_1(dev);
1318         ip_rt_unlock();
1319         wake_up(&rt_wait);
1320 }
1321 
1322 /*
1323    Called by ICMP module.
1324  */
1325 
1326 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1327 {
1328         struct rt_req * rtr;
1329         struct rtable * rt;
1330 
1331         rt = ip_rt_route(dst, 0);
1332         if (!rt)
1333                 return;
1334 
1335         if (rt->rt_gateway != src ||
1336             rt->rt_dev != dev ||
1337             ((gw^dev->pa_addr)&dev->pa_mask) ||
1338             ip_chk_addr(gw))
1339         {
1340                 ip_rt_put(rt);
1341                 return;
1342         }
1343         ip_rt_put(rt);
1344 
1345         ip_rt_fast_lock();
1346         if (ip_rt_lock == 1)
1347         {
1348                 rt_redirect_1(dst, gw, dev);
1349                 ip_rt_unlock();
1350                 return;
1351         }
1352 
1353         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1354         if (rtr)
1355         {
1356                 rtr->dst = dst;
1357                 rtr->gw = gw;
1358                 rtr->dev = dev;
1359                 rt_req_enqueue(&rt_backlog, rtr);
1360                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1361         }
1362         ip_rt_unlock();
1363 }
1364 
1365 
1366 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1367 {
1368         if (ip_rt_lock == 1)
1369         {
1370                 rt_garbage_collect_1();
1371                 return;
1372         }
1373         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1374 }
1375 
1376 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1377 {
1378         unsigned long   flags;
1379         struct rtable   **rthp;
1380         __u32           daddr = rth->rt_dst;
1381         unsigned long   now = jiffies;
1382 
1383 #if RT_CACHE_DEBUG >= 2
1384         if (ip_rt_lock != 1)
1385         {
1386                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1387                 return;
1388         }
1389 #endif
1390 
1391         save_flags(flags);
1392 
1393         if (rth->rt_dev->header_cache_bind)
1394         {
1395                 struct rtable * rtg = rth;
1396 
1397                 if (rth->rt_gateway != daddr)
1398                 {
1399                         ip_rt_fast_unlock();
1400                         rtg = ip_rt_route(rth->rt_gateway, 0);
1401                         ip_rt_fast_lock();
1402                 }
1403 
1404                 if (rtg)
1405                 {
1406                         if (rtg == rth)
1407                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1408                         else
1409                         {
1410                                 if (rtg->rt_hh)
1411                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1412                                 rth->rt_hh = rtg->rt_hh;
1413                                 ip_rt_put(rtg);
1414                         }
1415                 }
1416         }
1417 
1418         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1419                 rt_garbage_collect();
1420 
1421         cli();
1422         rth->rt_next = ip_rt_hash_table[hash];
1423 #if RT_CACHE_DEBUG >= 2
1424         if (rth->rt_next)
1425         {
1426                 struct rtable * trth;
1427                 printk("rt_cache @%02x: %08x", hash, daddr);
1428                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1429                         printk(" . %08x", trth->rt_dst);
1430                 printk("\n");
1431         }
1432 #endif
1433         ip_rt_hash_table[hash] = rth;
1434         rthp = &rth->rt_next;
1435         sti();
1436         rt_cache_size++;
1437 
1438         /*
1439          * Cleanup duplicate (and aged off) entries.
1440          */
1441 
1442         while ((rth = *rthp) != NULL)
1443         {
1444 
1445                 cli();
1446                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1447                     || rth->rt_dst == daddr)
1448                 {
1449                         *rthp = rth->rt_next;
1450                         rt_cache_size--;
1451                         sti();
1452 #if RT_CACHE_DEBUG >= 2
1453                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1454 #endif
1455                         rt_free(rth);
1456                         continue;
1457                 }
1458                 sti();
1459                 rthp = &rth->rt_next;
1460         }
1461         restore_flags(flags);
1462 }
1463 
1464 /*
1465    RT should be already locked.
1466    
1467    We could improve this by keeping a chain of say 32 struct rtable's
1468    last freed for fast recycling.
1469    
1470  */
1471 
1472 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1473 {
1474         unsigned hash = ip_rt_hash_code(daddr)^local;
1475         struct rtable * rth;
1476         struct fib_node * f;
1477         struct fib_info * fi;
1478         __u32 saddr;
1479 
1480 #if RT_CACHE_DEBUG >= 2
1481         printk("rt_cache miss @%08x\n", daddr);
1482 #endif
1483 
1484         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1485         if (!rth)
1486         {
1487                 ip_rt_unlock();
1488                 return NULL;
1489         }
1490 
1491         if (local)
1492                 f = fib_lookup_local(daddr);
1493         else
1494                 f = fib_lookup (daddr);
1495 
1496         if (f)
1497         {
1498                 fi = f->fib_info;
1499                 f->fib_use++;
1500         }
1501 
1502         if (!f || (fi->fib_flags & RTF_REJECT))
1503         {
1504 #ifdef CONFIG_KERNELD   
1505                 char wanted_route[20];
1506 #endif          
1507 #if RT_CACHE_DEBUG >= 2
1508                 printk("rt_route failed @%08x\n", daddr);
1509 #endif
1510                 ip_rt_unlock();
1511                 kfree_s(rth, sizeof(struct rtable));
1512 #ifdef CONFIG_KERNELD           
1513                 daddr=ntohl(daddr);
1514                 sprintf(wanted_route, "%d.%d.%d.%d",
1515                         (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1516                         (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1517                 kerneld_route(wanted_route);    /* Dynamic route request */
1518 #endif          
1519                 return NULL;
1520         }
1521 
1522         saddr = fi->fib_dev->pa_addr;
1523 
1524         if (daddr == fi->fib_dev->pa_addr)
1525         {
1526                 f->fib_use--;
1527                 if ((f = fib_loopback) != NULL)
1528                 {
1529                         f->fib_use++;
1530                         fi = f->fib_info;
1531                 }
1532         }
1533         
1534         if (!f)
1535         {
1536                 ip_rt_unlock();
1537                 kfree_s(rth, sizeof(struct rtable));
1538                 return NULL;
1539         }
1540 
1541         rth->rt_dst     = daddr;
1542         rth->rt_src     = saddr;
1543         rth->rt_lastuse = jiffies;
1544         rth->rt_refcnt  = 1;
1545         rth->rt_use     = 1;
1546         rth->rt_next    = NULL;
1547         rth->rt_hh      = NULL;
1548         rth->rt_gateway = fi->fib_gateway;
1549         rth->rt_dev     = fi->fib_dev;
1550         rth->rt_mtu     = fi->fib_mtu;
1551         rth->rt_window  = fi->fib_window;
1552         rth->rt_irtt    = fi->fib_irtt;
1553         rth->rt_tos     = f->fib_tos;
1554         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1555         if (local)
1556                 rth->rt_flags   |= RTF_LOCAL;
1557 
1558         if (!(rth->rt_flags & RTF_GATEWAY))
1559                 rth->rt_gateway = rth->rt_dst;
1560         /*
1561          *      Multicast is never gatewayed.
1562          */
1563         if (MULTICAST(daddr))
1564                 rth->rt_gateway = rth->rt_dst;
1565 
1566         if (ip_rt_lock == 1)
1567                 rt_cache_add(hash, rth);
1568         else
1569         {
1570                 rt_free(rth);
1571 #if RT_CACHE_DEBUG >= 1
1572                 printk("rt_cache: route to %08x was born dead\n", daddr);
1573 #endif
1574         }
1575 
1576         ip_rt_unlock();
1577         return rth;
1578 }
1579 
1580 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1581 {
1582         if (rt)
1583                 ATOMIC_DECR(&rt->rt_refcnt);
1584 }
1585 
1586 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1587 {
1588         struct rtable * rth;
1589 
1590         ip_rt_fast_lock();
1591 
1592         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1593         {
1594                 if (rth->rt_dst == daddr)
1595                 {
1596                         rth->rt_lastuse = jiffies;
1597                         ATOMIC_INCR(&rth->rt_use);
1598                         ATOMIC_INCR(&rth->rt_refcnt);
1599                         ip_rt_unlock();
1600                         return rth;
1601                 }
1602         }
1603         return ip_rt_slow_route (daddr, local);
1604 }
1605 
1606 /*
1607  *      Process a route add request from the user, or from a kernel
1608  *      task.
1609  */
1610  
1611 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1612 {
1613         int err;
1614         char * devname;
1615         struct device * dev = NULL;
1616         unsigned long flags;
1617         __u32 daddr, mask, gw;
1618         short metric;
1619 
1620         /*
1621          *      If a device is specified find it.
1622          */
1623          
1624         if ((devname = r->rt_dev) != NULL) 
1625         {
1626                 err = getname(devname, &devname);
1627                 if (err)
1628                         return err;
1629                 dev = dev_get(devname);
1630                 putname(devname);
1631                 if (!dev)
1632                         return -ENODEV;
1633         }
1634         
1635         /*
1636          *      If the device isn't INET, don't allow it
1637          */
1638 
1639         if (r->rt_dst.sa_family != AF_INET)
1640                 return -EAFNOSUPPORT;
1641 
1642         /*
1643          *      Make local copies of the important bits
1644          *      We decrement the metric by one for BSD compatibility.
1645          */
1646          
1647         flags = r->rt_flags;
1648         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1649         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1650         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1651         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1652 
1653         /*
1654          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1655          *      to indicate which iface. Not as clean as the nice Linux dev technique
1656          *      but people keep using it...  (and gated likes it ;))
1657          */
1658          
1659         if (!dev && (flags & RTF_GATEWAY)) 
1660         {
1661                 struct device *dev2;
1662                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1663                 {
1664                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1665                         {
1666                                 flags &= ~RTF_GATEWAY;
1667                                 dev = dev2;
1668                                 break;
1669                         }
1670                 }
1671         }
1672 
1673         /*
1674          *      Ignore faulty masks
1675          */
1676          
1677         if (bad_mask(mask, daddr))
1678                 mask=0;
1679 
1680         /*
1681          *      Set the mask to nothing for host routes.
1682          */
1683          
1684         if (flags & RTF_HOST)
1685                 mask = 0xffffffff;
1686         else if (mask && r->rt_genmask.sa_family != AF_INET)
1687                 return -EAFNOSUPPORT;
1688 
1689         /*
1690          *      You can only gateway IP via IP..
1691          */
1692          
1693         if (flags & RTF_GATEWAY) 
1694         {
1695                 if (r->rt_gateway.sa_family != AF_INET)
1696                         return -EAFNOSUPPORT;
1697                 if (!dev)
1698                         dev = get_gw_dev(gw);
1699         } 
1700         else if (!dev)
1701                 dev = ip_dev_check(daddr);
1702 
1703         /*
1704          *      Unknown device.
1705          */
1706          
1707         if (dev == NULL)
1708                 return -ENETUNREACH;
1709 
1710         /*
1711          *      Add the route
1712          */
1713 
1714         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1715         return 0;
1716 }
1717 
1718 
1719 /*
1720  *      Remove a route, as requested by the user.
1721  */
1722 
1723 int ip_rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1724 {
1725         struct sockaddr_in *trg;
1726         struct sockaddr_in *msk;
1727         struct sockaddr_in *gtw;
1728         char *devname;
1729         int err;
1730         struct device * dev = NULL;
1731 
1732         trg = (struct sockaddr_in *) &r->rt_dst;
1733         msk = (struct sockaddr_in *) &r->rt_genmask;
1734         gtw = (struct sockaddr_in *) &r->rt_gateway;
1735         if ((devname = r->rt_dev) != NULL) 
1736         {
1737                 err = getname(devname, &devname);
1738                 if (err)
1739                         return err;
1740                 dev = dev_get(devname);
1741                 putname(devname);
1742                 if (!dev)
1743                         return -ENODEV;
1744         }
1745         /*
1746          * metric can become negative here if it wasn't filled in
1747          * but that's a fortunate accident; we really use that in rt_del.
1748          */
1749         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1750                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1751         return err;
1752 }
1753 
1754 /*
1755  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1756  */
1757  
1758 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1759 {
1760         int err;
1761         struct rtentry rt;
1762 
1763         switch(cmd) 
1764         {
1765                 case SIOCADDRT:         /* Add a route */
1766                 case SIOCDELRT:         /* Delete a route */
1767                         if (!suser())
1768                                 return -EPERM;
1769                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1770                         if (err)
1771                                 return err;
1772                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1773                         return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt);
1774         }
1775 
1776         return -EINVAL;
1777 }
1778 
1779 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1780 {
1781         /* Thanks! */
1782         return;
1783 }
1784 

/* [previous][next][first][last][top][bottom][index][help] */